Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[perf] cache nix.searchSystem #1546

Merged
merged 6 commits into from
Oct 27, 2023
Merged
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -39,7 +39,7 @@ require (
github.com/tailscale/hujson v0.0.0-20221223112325-20486734a56a
github.com/wk8/go-ordered-map/v2 v2.1.8
github.com/zealic/go2node v0.1.0
go.jetpack.io/pkg v0.0.0-20231012130632-77dcd111db2e
go.jetpack.io/pkg v0.0.0-20231019173032-13f60a9e32b8
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17
golang.org/x/mod v0.12.0
golang.org/x/sync v0.3.0
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -339,8 +339,8 @@ github.com/yuin/gopher-lua v0.0.0-20190514113301-1cd887cd7036/go.mod h1:gqRgreBU
github.com/zaffka/mongodb-boltdb-mock v0.0.0-20221014194232-b4bb03fbe3a0/go.mod h1:GsDD1qsG+86MeeCG7ndi6Ei3iGthKL3wQ7PTFigDfNY=
github.com/zealic/go2node v0.1.0 h1:ofxpve08cmLJBwFdI0lPCk9jfwGWOSD+s6216x0oAaA=
github.com/zealic/go2node v0.1.0/go.mod h1:GrkFr+HctXwP7vzcU9RsgtAeJjTQ6Ud0IPCQAqpTfBg=
go.jetpack.io/pkg v0.0.0-20231012130632-77dcd111db2e h1:GtqFrE5uUf6rXFk3zaIqIfLfykrqrB8gp6bDmOfH+2s=
go.jetpack.io/pkg v0.0.0-20231012130632-77dcd111db2e/go.mod h1:m49CAcLbpZttEbzoEWC1SKD+/z5mEiFdw0dQxY6AM5Q=
go.jetpack.io/pkg v0.0.0-20231019173032-13f60a9e32b8 h1:xzAAUVfg9uqyJhZVc+ZDTLHtFlMhj/5St5JnVfDxbpk=
go.jetpack.io/pkg v0.0.0-20231019173032-13f60a9e32b8/go.mod h1:m49CAcLbpZttEbzoEWC1SKD+/z5mEiFdw0dQxY6AM5Q=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
25 changes: 19 additions & 6 deletions internal/devpkg/package.go
Original file line number Diff line number Diff line change
@@ -285,7 +285,7 @@ func (p *Package) NormalizedPackageAttributePath() (string, error) {
}

// normalizePackageAttributePath calls nix search to find the normalized attribute
// path. It is an expensive call (~100ms).
// path. It may be an expensive call (~100ms).
func (p *Package) normalizePackageAttributePath() (string, error) {
var query string
if p.IsDevboxPackage() {
@@ -302,11 +302,24 @@ func (p *Package) normalizePackageAttributePath() (string, error) {
query = p.String()
}

// We prefer search over just trying to parse the URL because search will
// guarantee that the package exists for the current system.
infos, err := nix.Search(query)
if err != nil {
return "", err
// We prefer nix.Search over just trying to parse the package's "URL" because
// nix.Search will guarantee that the package exists for the current system.
var infos map[string]*nix.Info
var err error
if p.IsDevboxPackage() && !p.IsRunX() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice! this feels really understandable to me.

// Perf optimization: For queries of the form nixpkgs/<commit>#foo, we can
// use a nix.Search cache.
//
// This will be slow if its the first time on the user's machine that this
// query is running. Otherwise, it will be cached and fast.
if infos, err = nix.SearchNixpkgsAttribute(query); err != nil {
return "", err
}
} else {
// fallback to the slow but generalized nix.Search
if infos, err = nix.Search(query); err != nil {
return "", err
}
}

if len(infos) == 1 {
1 change: 1 addition & 0 deletions internal/nix/nix.go
Original file line number Diff line number Diff line change
@@ -44,6 +44,7 @@ type PrintDevEnvArgs struct {
// PrintDevEnv calls `nix print-dev-env -f <path>` and returns its output. The output contains
// all the environment variables and bash functions required to create a nix shell.
func (*Nix) PrintDevEnv(ctx context.Context, args *PrintDevEnvArgs) (*PrintDevEnvOut, error) {
defer debug.FunctionTimer().End()
defer trace.StartRegion(ctx, "nixPrintDevEnv").End()

var data []byte
102 changes: 96 additions & 6 deletions internal/nix/search.go
Original file line number Diff line number Diff line change
@@ -5,10 +5,14 @@ import (
"fmt"
"os"
"os/exec"
"regexp"
"strings"
"time"

"github.com/pkg/errors"
"go.jetpack.io/devbox/internal/debug"
"go.jetpack.io/devbox/internal/xdg"
"go.jetpack.io/pkg/filecache"
)

var (
@@ -19,10 +23,10 @@ var (
type Info struct {
// attribute key is different in flakes vs legacy so we should only use it
// if we know exactly which version we are using
AttributeKey string
PName string
Summary string
Version string
AttributeKey string `json:"attribute"`
PName string `json:"pname"`
Summary string `json:"summary"`
Version string `json:"version"`
}

func (i *Info) String() string {
@@ -31,10 +35,11 @@ func (i *Info) String() string {

func Search(url string) (map[string]*Info, error) {
if strings.HasPrefix(url, "runx:") {
// TODO implement runx search
// TODO implement runx search. Also, move this check outside this function: nix package
// should not be handling runx logic.
return map[string]*Info{}, nil
}
return searchSystem(url, "")
return searchSystem(url, "" /* system */)
}

func parseSearchResults(data []byte) map[string]*Info {
@@ -106,3 +111,88 @@ func searchSystem(url, system string) (map[string]*Info, error) {
}
return parseSearchResults(out), nil
}

type CachedSearchResult struct {
Results map[string]*Info `json:"results"`
// Query is added easier to grep for debuggability
Query string `json:"query"`
}
savil marked this conversation as resolved.
Show resolved Hide resolved

var allowableQuery = regexp.MustCompile("^github:NixOS/nixpkgs/[0-9a-f]{40}#[^#]+$")

func isAllowableQuery(query string) bool {
savil marked this conversation as resolved.
Show resolved Hide resolved
return allowableQuery.MatchString(query)
}

// SearchNixpkgsAttribute is a wrapper around searchSystem that caches results.
// NOTE: we should be very conservative in where we use this function. `nix search`
// accepts generalized `installable regex` as arguments but is slow. For certain
// queries of the form `nixpkgs/<commit-hash>#attribute`, we can know for sure that
// once `nix search` returns a valid result, it will always be the very same result.
// Hence we can cache it locally and answer future queries fast, by not calling `nix search`.
func SearchNixpkgsAttribute(query string) (map[string]*Info, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just CachedSearch

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the reasons explained in the comment.

I wanted to keep it specific to search queries I have high confidence should always resolve to the very same result. The nix search api accepts installable regex queries which this doesn't. Hence I chose a very specific name to indicate what scenarios this works with.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see my comment above about flake-reference queries not being suitable to be cached.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we throw in a regex to verify that the input query is ^github:NixOS/nixpkgs/[0-9a-f]{40}#? Just to make sure we don't end up caching a ref like github:NixOS/nixpkgs/nixpkgs-unstable#go_1_19.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good call, done

if !isAllowableQuery(query) {
return nil, errors.Errorf("invalid query: %s, must match regex: %s", query, allowableQuery)
}

key := cacheKey(query)

// Check if the query was already cached, and return the result if so
cache := filecache.New("devbox/nix", filecache.WithCacheDir(xdg.CacheSubpath("")))
if cachedResults, err := cache.Get(key); err == nil {
var results map[string]*Info
if err := json.Unmarshal(cachedResults, &results); err != nil {
return nil, err
}
return results, nil
} else if !filecacheNeedsUpdate(err) {
return nil, err // genuine error
}

// If not cached, or an update is needed, then call searchSystem
infos, err := searchSystem(query, "" /*system*/)
if err != nil {
return nil, err
}

// Save the results to the cache
marshalled, err := json.Marshal(infos)
if err != nil {
return nil, err
}
// TODO savil: add a SetForever API that does not expire. Time based expiration is not needed here
// because we're caching results that are guaranteed to be stable.
// TODO savil: Make filecache.cache a public struct so it can be passed into other functions
const oneYear = 12 * 30 * 24 * time.Hour
if err := cache.Set(key, marshalled, oneYear); err != nil {
return nil, err
}

return infos, nil
}

// read as: filecache.NeedsUpdate(err)
// TODO savil: this should be implemented in the filecache package
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agree!

func filecacheNeedsUpdate(err error) bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fwiw I would just name this isNotFoundOrExpired

Maybe in filecache, using new errors.Join we can return an error that is both NotFound and also something like CacheMiss

so return errors.Join(NotFound, CacheMiss)

and then here we just check if it is CacheMiss

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, that sgtm

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

named it as: filecacheIsCacheMiss

return errors.Is(err, filecache.NotFound) || errors.Is(err, filecache.Expired)
}

// cacheKey sanitizes the search query to be a valid unix filename.
// This cache key is used as the filename to store the cache value, and having a
// representation of the query is important for debuggability.
func cacheKey(query string) string {
// Replace disallowed characters with underscores.
re := regexp.MustCompile(`[:/#@+]`)
sanitized := re.ReplaceAllString(query, "_")

// Remove any remaining invalid characters.
sanitized = regexp.MustCompile(`[^\w\.-]`).ReplaceAllString(sanitized, "")

// Ensure the filename doesn't exceed the maximum length.
const maxLen = 255
if len(sanitized) > maxLen {
sanitized = sanitized[:maxLen]
}

return sanitized
}
62 changes: 62 additions & 0 deletions internal/nix/search_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package nix

import (
"testing"
)

func TestSearchCacheKey(t *testing.T) {
testCases := []struct {
in string
out string
}{
{
"github:NixOS/nixpkgs/8670e496ffd093b60e74e7fa53526aa5920d09eb#go_1_19",
"github_NixOS_nixpkgs_8670e496ffd093b60e74e7fa53526aa5920d09eb_go_1_19",
},
{
"github:nixos/nixpkgs/7d0ed7f2e5aea07ab22ccb338d27fbe347ed2f11#emacsPackages.@",
"github_nixos_nixpkgs_7d0ed7f2e5aea07ab22ccb338d27fbe347ed2f11_emacsPackages._",
},
}

for _, testCase := range testCases {
t.Run(testCase.out, func(t *testing.T) {
out := cacheKey(testCase.in)
if out != testCase.out {
t.Errorf("got %s, want %s", out, testCase.out)
}
})
}
}

func TestIsAllowableQuery(t *testing.T) {
testCases := []struct {
in string
expected bool
}{
{
"github:NixOS/nixpkgs/8670e496ffd093b60e74e7fa53526aa5920d09eb#go_1_19",
true,
},
{
"github:NixOS/nixpkgs/8670e496ffd093b60e74e7fa53526aa5920d09eb",
false,
},
{
"github:NixOS/nixpkgs/8670e496ffd093b60e74e7fa53526aa5920d09eb#",
false,
},
{
"github:NixOS/nixpkgs/nixpkgs-unstable#go_1_19",
false,
},
}
for _, testCase := range testCases {
t.Run(testCase.in, func(t *testing.T) {
out := isAllowableQuery(testCase.in)
if out != testCase.expected {
t.Errorf("got %t, want %t", out, testCase.expected)
}
})
}
}