-
Notifications
You must be signed in to change notification settings - Fork 228
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[perf] cache nix.searchSystem #1546
Changes from 5 commits
503f884
54d95ad
b2fb532
e393a99
0dbfe03
314d249
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,10 +5,14 @@ import ( | |
"fmt" | ||
"os" | ||
"os/exec" | ||
"regexp" | ||
"strings" | ||
"time" | ||
|
||
"github.com/pkg/errors" | ||
"go.jetpack.io/devbox/internal/debug" | ||
"go.jetpack.io/devbox/internal/xdg" | ||
"go.jetpack.io/pkg/filecache" | ||
) | ||
|
||
var ( | ||
|
@@ -19,10 +23,10 @@ var ( | |
type Info struct { | ||
// attribute key is different in flakes vs legacy so we should only use it | ||
// if we know exactly which version we are using | ||
AttributeKey string | ||
PName string | ||
Summary string | ||
Version string | ||
AttributeKey string `json:"attribute"` | ||
PName string `json:"pname"` | ||
Summary string `json:"summary"` | ||
Version string `json:"version"` | ||
} | ||
|
||
func (i *Info) String() string { | ||
|
@@ -31,10 +35,11 @@ func (i *Info) String() string { | |
|
||
func Search(url string) (map[string]*Info, error) { | ||
if strings.HasPrefix(url, "runx:") { | ||
// TODO implement runx search | ||
// TODO implement runx search. Also, move this check outside this function: nix package | ||
// should not be handling runx logic. | ||
return map[string]*Info{}, nil | ||
} | ||
return searchSystem(url, "") | ||
return searchSystem(url, "" /* system */) | ||
} | ||
|
||
func parseSearchResults(data []byte) map[string]*Info { | ||
|
@@ -106,3 +111,88 @@ func searchSystem(url, system string) (map[string]*Info, error) { | |
} | ||
return parseSearchResults(out), nil | ||
} | ||
|
||
type CachedSearchResult struct { | ||
Results map[string]*Info `json:"results"` | ||
// Query is added easier to grep for debuggability | ||
Query string `json:"query"` | ||
} | ||
savil marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
var allowableQuery = regexp.MustCompile("^github:NixOS/nixpkgs/[0-9a-f]{40}#[^#]+$") | ||
|
||
func isAllowableQuery(query string) bool { | ||
savil marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return allowableQuery.MatchString(query) | ||
} | ||
|
||
// SearchNixpkgsAttribute is a wrapper around searchSystem that caches results. | ||
// NOTE: we should be very conservative in where we use this function. `nix search` | ||
// accepts generalized `installable regex` as arguments but is slow. For certain | ||
// queries of the form `nixpkgs/<commit-hash>#attribute`, we can know for sure that | ||
// once `nix search` returns a valid result, it will always be the very same result. | ||
// Hence we can cache it locally and answer future queries fast, by not calling `nix search`. | ||
func SearchNixpkgsAttribute(query string) (map[string]*Info, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the reasons explained in the comment. I wanted to keep it specific to search queries I have high confidence should always resolve to the very same result. The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see my comment above about flake-reference queries not being suitable to be cached. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we throw in a regex to verify that the input query is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good call, done |
||
if !isAllowableQuery(query) { | ||
return nil, errors.Errorf("invalid query: %s, must match regex: %s", query, allowableQuery) | ||
} | ||
|
||
key := cacheKey(query) | ||
|
||
// Check if the query was already cached, and return the result if so | ||
cache := filecache.New("devbox/nix", filecache.WithCacheDir(xdg.CacheSubpath(""))) | ||
if cachedResults, err := cache.Get(key); err == nil { | ||
var results map[string]*Info | ||
if err := json.Unmarshal(cachedResults, &results); err != nil { | ||
return nil, err | ||
} | ||
return results, nil | ||
} else if !filecacheNeedsUpdate(err) { | ||
return nil, err // genuine error | ||
} | ||
|
||
// If not cached, or an update is needed, then call searchSystem | ||
infos, err := searchSystem(query, "" /*system*/) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
// Save the results to the cache | ||
marshalled, err := json.Marshal(infos) | ||
if err != nil { | ||
return nil, err | ||
} | ||
// TODO savil: add a SetForever API that does not expire. Time based expiration is not needed here | ||
// because we're caching results that are guaranteed to be stable. | ||
// TODO savil: Make filecache.cache a public struct so it can be passed into other functions | ||
const oneYear = 12 * 30 * 24 * time.Hour | ||
if err := cache.Set(key, marshalled, oneYear); err != nil { | ||
return nil, err | ||
} | ||
|
||
return infos, nil | ||
} | ||
|
||
// read as: filecache.NeedsUpdate(err) | ||
// TODO savil: this should be implemented in the filecache package | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. agree! |
||
func filecacheNeedsUpdate(err error) bool { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fwiw I would just name this Maybe in filecache, using new so and then here we just check if it is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, that sgtm There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. named it as: |
||
return errors.Is(err, filecache.NotFound) || errors.Is(err, filecache.Expired) | ||
} | ||
|
||
// cacheKey sanitizes the search query to be a valid unix filename. | ||
// This cache key is used as the filename to store the cache value, and having a | ||
// representation of the query is important for debuggability. | ||
func cacheKey(query string) string { | ||
// Replace disallowed characters with underscores. | ||
re := regexp.MustCompile(`[:/#@+]`) | ||
sanitized := re.ReplaceAllString(query, "_") | ||
|
||
// Remove any remaining invalid characters. | ||
sanitized = regexp.MustCompile(`[^\w\.-]`).ReplaceAllString(sanitized, "") | ||
|
||
// Ensure the filename doesn't exceed the maximum length. | ||
const maxLen = 255 | ||
if len(sanitized) > maxLen { | ||
sanitized = sanitized[:maxLen] | ||
} | ||
|
||
return sanitized | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
package nix | ||
|
||
import ( | ||
"testing" | ||
) | ||
|
||
func TestSearchCacheKey(t *testing.T) { | ||
testCases := []struct { | ||
in string | ||
out string | ||
}{ | ||
{ | ||
"github:NixOS/nixpkgs/8670e496ffd093b60e74e7fa53526aa5920d09eb#go_1_19", | ||
"github_NixOS_nixpkgs_8670e496ffd093b60e74e7fa53526aa5920d09eb_go_1_19", | ||
}, | ||
{ | ||
"github:nixos/nixpkgs/7d0ed7f2e5aea07ab22ccb338d27fbe347ed2f11#emacsPackages.@", | ||
"github_nixos_nixpkgs_7d0ed7f2e5aea07ab22ccb338d27fbe347ed2f11_emacsPackages._", | ||
}, | ||
} | ||
|
||
for _, testCase := range testCases { | ||
t.Run(testCase.out, func(t *testing.T) { | ||
out := cacheKey(testCase.in) | ||
if out != testCase.out { | ||
t.Errorf("got %s, want %s", out, testCase.out) | ||
} | ||
}) | ||
} | ||
} | ||
|
||
func TestIsAllowableQuery(t *testing.T) { | ||
testCases := []struct { | ||
in string | ||
expected bool | ||
}{ | ||
{ | ||
"github:NixOS/nixpkgs/8670e496ffd093b60e74e7fa53526aa5920d09eb#go_1_19", | ||
true, | ||
}, | ||
{ | ||
"github:NixOS/nixpkgs/8670e496ffd093b60e74e7fa53526aa5920d09eb", | ||
false, | ||
}, | ||
{ | ||
"github:NixOS/nixpkgs/8670e496ffd093b60e74e7fa53526aa5920d09eb#", | ||
false, | ||
}, | ||
{ | ||
"github:NixOS/nixpkgs/nixpkgs-unstable#go_1_19", | ||
false, | ||
}, | ||
} | ||
for _, testCase := range testCases { | ||
t.Run(testCase.in, func(t *testing.T) { | ||
out := isAllowableQuery(testCase.in) | ||
if out != testCase.expected { | ||
t.Errorf("got %t, want %t", out, testCase.expected) | ||
} | ||
}) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice! this feels really understandable to me.