Skip to content

Commit

Permalink
Refactored mirrorDB logic
Browse files Browse the repository at this point in the history
  • Loading branch information
Focshole authored and anatol committed Dec 27, 2021
1 parent 5849d0d commit ea1978e
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 141 deletions.
13 changes: 9 additions & 4 deletions pacoloco.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ var (
)

// force resources prefetching
func prefetchRequest(url string) (err error) {
func prefetchRequest(url string, optionalCustomPath string) (err error) {
urlPath := url
matches := pathRegex.FindStringSubmatch(urlPath)
if len(matches) == 0 {
Expand All @@ -182,7 +182,12 @@ func prefetchRequest(url string) (err error) {
return err
}
}
filePath := filepath.Join(cachePath, fileName)
var filePath string
if optionalCustomPath != "" {
filePath = filepath.Join(optionalCustomPath, fileName)
} else {
filePath = filepath.Join(cachePath, fileName)
}
// mandatory update when prefetching,

mutexKey := repoName + ":" + fileName
Expand Down Expand Up @@ -294,7 +299,7 @@ func handleRequest(w http.ResponseWriter, req *http.Request) error {
if err == nil && config.Prefetch != nil && !strings.HasSuffix(fileName, ".sig") && !strings.HasSuffix(fileName, ".db") {
updateDBRequestedFile(repoName, fileName) // update info for prefetching
} else if err == nil && config.Prefetch != nil && strings.HasSuffix(fileName, ".db") {
addDBfileToDB(repo.URL+path+"/"+fileName, repoName)
updateDBRequestedDB(repoName, path, fileName)
}
} else {
for _, url := range repo.URLs {
Expand All @@ -303,7 +308,7 @@ func handleRequest(w http.ResponseWriter, req *http.Request) error {
if config.Prefetch != nil && !strings.HasSuffix(fileName, ".sig") && !strings.HasSuffix(fileName, ".db") {
updateDBRequestedFile(repoName, fileName) // update info for prefetching
} else if err == nil && config.Prefetch != nil && strings.HasSuffix(fileName, ".db") {
addDBfileToDB(url+path+"/"+fileName, repoName)
updateDBRequestedDB(repoName, path, fileName)
}
break
}
Expand Down
24 changes: 5 additions & 19 deletions prefetch.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,26 +196,12 @@ func cleanPrefetchDB() {
for _, pkgToDel := range deadPkgs {
purgePkgIfExists(&pkgToDel)
}
// delete mirror links which does not exist on the config file
// delete mirror links which does not exist on the config file or are invalid
mirrors := getAllMirrorsDB()
for _, mirror := range mirrors {
if repoLinks, exists := config.Repos[mirror.RepoName]; exists {
var URLs []string
if repoLinks.URL != "" {
URLs = append(URLs, repoLinks.URL)
} else {
URLs = repoLinks.URLs
}
// compare the mirror URL with the URLs in the config file
found := false
for _, URL := range URLs {
if strings.Contains(mirror.URL, URL) {
found = true
break
}
}
if !found {
log.Printf("Deleting %v, mirror not found on config file", mirror.URL)
if _, exists := config.Repos[mirror.RepoName]; exists {
if strings.Index(mirror.URL, "/repo/") != 0 {
log.Printf("warning: deleting %v link due to migrating to a newer version of pacoloco. Simply do 'pacman -Sy' on repo %v to fix the prefetching.", mirror.URL, mirror.RepoName)
deleteMirrorDBFromDB(mirror)
}

Expand Down Expand Up @@ -244,7 +230,7 @@ func prefetchAllPkgs() {
urls := getPkgToUpdateDownloadURLs(p)
var failed []string
for _, url := range urls {
if err := prefetchRequest(url); err == nil {
if err := prefetchRequest(url, ""); err == nil {
purgePkgIfExists(&pkg) // delete the old package
if strings.HasSuffix(url, ".sig") {
log.Printf("Successfully prefetched %v-%v signature\n", p.PackageName, p.Arch)
Expand Down
20 changes: 13 additions & 7 deletions prefetch_db.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ func getAllPackagePaths(pkg Package) []string {
return pkgPaths
}

// MirrorDB is a struct which describes a ".db" link from a mirror.
// It is quite hard to know where db files are, so i'll store them when they are requested
// MirrorDB is a struct which stores all the relevant informations about a requested db file from a client.
// Pacoloco uses this information to replicate the same request when it has to prefetch updated DB files from upstream mirrors
// I assume the other files to download are on the same path of the DB
type MirrorDB struct {
URL string `gorm:"primaryKey;not null"`
Expand Down Expand Up @@ -129,9 +129,14 @@ func getAndDropUnusedPackages(period time.Duration) []Package {
return unusedPkgs
}

// Returns unused db files and removes them from the db
// Returns unused db files or not existing repos and removes them from the db
func dropUnusedDBFiles(olderThan time.Time) {
prefetchDB.Model(&MirrorDB{}).Unscoped().Where("mirror_dbs.last_time_downloaded < ?", olderThan).Delete(&MirrorDB{})
repoNames := make([]string, 0, len(config.Repos))
for key := range config.Repos {
repoNames = append(repoNames, key)
}
prefetchDB.Model(&MirrorDB{}).Unscoped().Where("mirror_dbs.repo_name NOT IN ?", repoNames).Delete(&MirrorDB{})
}

// Returns dead packages and removes them from the db
Expand Down Expand Up @@ -199,13 +204,14 @@ func getPkgsToUpdate() []PkgToUpdate {
return pkgs
}

// add a complete url of a DB in a db. This urls are used to download afterwards the db to know which packages should be prefetched.
func addDBfileToDB(urlDB string, repoName string) (MirrorDB, error) {
// add a pacoloco url of a DB in a db. This urls are used to download afterwards the db to know which packages should be prefetched.
func updateDBRequestedDB(repoName string, path_ string, filename string) (MirrorDB, error) {
now := time.Now()
if prefetchDB == nil {
log.Fatalf("prefetchDB is uninitialized")
}
matches := urlRegex.FindStringSubmatch(urlDB)
urlDB := path.Join("/repo/", repoName, path_, filename)
matches := pathRegex.FindStringSubmatch(urlDB)
if len(matches) == 0 {
return MirrorDB{}, fmt.Errorf("url '%v' is invalid, cannot save it for prefetching", urlDB)
}
Expand All @@ -222,5 +228,5 @@ func getAllMirrorsDB() []MirrorDB {
}

func deleteMirrorDBFromDB(m MirrorDB) {
prefetchDB.Model(&MirrorDB{}).Unscoped().Where("mirror_dbs.url = ? and mirror_dbs.repo_name = ?", m.URL, m.RepoName)
prefetchDB.Model(&MirrorDB{}).Unscoped().Where("mirror_dbs.url = ? and mirror_dbs.repo_name = ?", m.URL, m.RepoName).Delete(&MirrorDB{})
}
16 changes: 11 additions & 5 deletions prefetch_db_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,19 +187,25 @@ func TestDropUnusedDBFiles(t *testing.T) {
testSetupHelper(t)
setupPrefetch()
oneMonthAgo := time.Now().AddDate(0, -1, 0)
if _, err := addDBfileToDB("test.db", "foo"); err == nil {
t.Errorf("Should have raised an error cause url is invalid")
// must be dropped because there is no repo called foo in testSetupHelper
if _, err := updateDBRequestedDB("foo", "/url/", "test2.db"); err != nil {
t.Error(err)
}
if _, err := addDBfileToDB("http://example.com/valid//url/test.db", "foo"); err != nil {
t.Errorf("Should have raised no error but got error %v", err)
// must not be dropped because there is a repo called example in testSetupHelper
if _, err := updateDBRequestedDB("example", "/url/", "test.db"); err != nil {
t.Error(err)
}
dropUnusedDBFiles(oneMonthAgo)
dbs := getAllMirrorsDB()
if len(dbs) != 1 {
t.Errorf("The db should contain %d entries, but it contains %d", 1, len(dbs))
}
var mirr MirrorDB
prefetchDB.Model(&MirrorDB{}).Where("mirror_dbs.url = ? and mirror_dbs.repo_name = ?", "http://example.com/valid//url/test.db", "foo").First(&mirr)
prefetchDB.Model(&MirrorDB{}).Where("mirror_dbs.url = ? and mirror_dbs.repo_name = ?", "/repo/example/url/test.db", "example").First(&mirr)
matches := pathRegex.FindStringSubmatch(mirr.URL)
if len(matches) == 0 {
t.Errorf("It should be a proper pacoloco path url")
}
twoMonthsAgo := time.Now().AddDate(0, -2, 0)
mirr.LastTimeDownloaded = &twoMonthsAgo
if db := prefetchDB.Save(&mirr); db.Error != nil {
Expand Down
20 changes: 10 additions & 10 deletions prefetch_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,14 @@ func TestPacolocoPrefetchIntegration(t *testing.T) {
}

func testPrefetchInvalidURL(t *testing.T) {
if err := prefetchRequest("/foo"); err == nil {
if err := prefetchRequest("/foo", ""); err == nil {
t.Error("Error expected")
}
}

func testPrefetchRequestNonExistingDb(t *testing.T) {
// Requesting non-existing repo
if err := prefetchRequest("/repo/test/test.db"); err == nil {
if err := prefetchRequest("/repo/test/test.db", ""); err == nil {
t.Error("Error expected")
}

Expand All @@ -70,7 +70,7 @@ func testPrefetchRequestExistingRepo(t *testing.T) {
config.Repos["repo1"] = Repo{}
defer delete(config.Repos, "repo1")

if err := prefetchRequest("/repo/repo1/test.db"); err == nil {
if err := prefetchRequest("/repo/repo1/test.db", ""); err == nil {
t.Error("Error expected")
}

Expand Down Expand Up @@ -101,7 +101,7 @@ func testPrefetchRequestPackageFile(t *testing.T) {
pkgModTime := time.Now().Add(-time.Hour)
os.Chtimes(pkgAtMirror, pkgModTime, pkgModTime)

err := prefetchRequest("/repo/repo3/test-1-1-any.pkg.tar.zst")
err := prefetchRequest("/repo/repo3/test-1-1-any.pkg.tar.zst", "")

defer os.RemoveAll(path.Join(testPacolocoDir, "pkgs", "repo3")) // remove cached content

Expand All @@ -125,7 +125,7 @@ func testPrefetchRequestPackageFile(t *testing.T) {
newDbModTime := time.Now()
os.Chtimes(pkgAtMirror, newDbModTime, newDbModTime)

err = prefetchRequest("/repo/repo3/test-1-1-any.pkg.tar.zst")
err = prefetchRequest("/repo/repo3/test-1-1-any.pkg.tar.zst", "")

if err != nil {
t.Errorf("Expected success, got %v", err)
Expand Down Expand Up @@ -157,7 +157,7 @@ func testPrefetchFailover(t *testing.T) {
t.Fatal(err)
}

err := prefetchRequest("/repo/failover/test-1-1-any.pkg.tar.zst")
err := prefetchRequest("/repo/failover/test-1-1-any.pkg.tar.zst", "")

defer os.RemoveAll(path.Join(testPacolocoDir, "pkgs", "failover")) // remove cached content

Expand Down Expand Up @@ -187,7 +187,7 @@ func testPrefetchRealDB(t *testing.T) {

dbAtMirror := path.Join(mirrorDir, "mirror2", "test.db")
createDbTarball(dbAtMirror, getTestTarDB())
mirror, err := addDBfileToDB(config.Repos["repo2"].URL+"/test.db", "repo2")
mirror, err := updateDBRequestedDB("repo2", "", "/test.db")
if err != nil {
t.Errorf("This shouldn't fail. Error: %v", err)
}
Expand Down Expand Up @@ -219,7 +219,7 @@ func testPrefetchRequestExistingRepoWithDb(t *testing.T) {
dbModTime := time.Now().Add(-time.Hour)
os.Chtimes(dbAtMirror, dbModTime, dbModTime)

err := prefetchRequest("/repo/repo2/test.db")
err := prefetchRequest("/repo/repo2/test.db", "")

if err != nil {
t.Errorf("Expected success, got %v", err)
Expand Down Expand Up @@ -250,7 +250,7 @@ func testPrefetchRequestExistingRepoWithDb(t *testing.T) {
newDbModTime := time.Now()
os.Chtimes(dbAtMirror, newDbModTime, newDbModTime)

prefetchRequest("/repo/repo2/test.db")
prefetchRequest("/repo/repo2/test.db", "")
if err != nil {
t.Errorf("Expected success, got %v", err)
}
Expand Down Expand Up @@ -281,7 +281,7 @@ func testIntegrationPrefetchAllPkgs(t *testing.T) {
dbAtMirror := path.Join(mirrorDir, "mirror3", "test.db")
createDbTarball(dbAtMirror, getTestTarDB())
// fake a request to the db
if _, err := addDBfileToDB(config.Repos["repo3"].URL+"/test.db", "repo3"); err != nil {
if _, err := updateDBRequestedDB("repo3", "", "/test.db"); err != nil {
t.Errorf("Should not generate errors, but got %v", err)
}
// now add a fake older version of a package which is in the db
Expand Down
69 changes: 12 additions & 57 deletions repo_db_mirror.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"path"
"path/filepath"
"strings"
"time"
)

// Uncompresses a gzip file
Expand Down Expand Up @@ -84,87 +83,43 @@ func extractFilenamesFromTar(filePath string) ([]string, error) {
}

// This function returns a url which should download the exactly identical pkg when sent to pacoloco except for the file extension
func getPacolocoURL(pkg Package, prefix string) string {
return strings.ReplaceAll(("/repo/" + pkg.RepoName + "/" + prefix + "/" + pkg.PackageName + "-" + pkg.Version + "-" + pkg.Arch), "//", "/")
func getPacolocoURL(pkg Package, prefixPath string) string {
return strings.ReplaceAll(("/repo/" + pkg.RepoName + "/" + prefixPath + "/" + pkg.PackageName + "-" + pkg.Version + "-" + pkg.Arch), "//", "/")
}

// Builds a mirror package
// It requires the prefix, which is the relative path in which the db is contained
func buildMirrorPkg(fileName string, repoName string, prefix string) (MirrorPackage, error) {
func buildMirrorPkg(fileName string, repoName string, prefixPath string) (MirrorPackage, error) {
matches := filenameRegex.FindStringSubmatch(fileName)
if len(matches) >= 7 {
packageName := matches[1]
version := matches[2]
arch := matches[3]
ext := matches[5]
pkg := Package{PackageName: packageName, Version: version, Arch: arch, RepoName: repoName}
pacolocoURL := getPacolocoURL(pkg, prefix)
pacolocoURL := getPacolocoURL(pkg, prefixPath)
return MirrorPackage{PackageName: packageName, Version: version, Arch: arch, DownloadURL: pacolocoURL, RepoName: repoName, FileExt: ext}, nil
}
return MirrorPackage{}, fmt.Errorf("filename %v does not match regex, matches length is %d", fileName, len(matches))
}

// Returns the "path" field from a mirror url, e.g. from
// https://mirror.example.com/mirror/packages/archlinux//extra/os/x86_64/extra.db
// it extracts /extra/os/x86_64
func getPrefixFromMirrorDB(mirror MirrorDB) (string, error) {
repoLinks, exists := config.Repos[mirror.RepoName]
if !exists {
// This mirror link is a residual of an old config
return "", fmt.Errorf("error: Mirror link %v is associated with repo %v which does not exist in config", mirror.URL, mirror.RepoName)
}

var URLs []string
if repoLinks.URL != "" {
URLs = append(URLs, repoLinks.URL)
} else {
URLs = repoLinks.URLs
}
for _, URL := range URLs {
splittedURL := strings.Split(mirror.URL, URL)
if len(splittedURL) <= 1 {
continue // this is not the proper url
}
matches := mirrorDBRegex.FindStringSubmatch(splittedURL[1])
if len(matches) < 1 {
// It means that the path is empty, e.g. //extra.db or extra.db
return "", nil
}
if !strings.HasPrefix(matches[0], "/") {
return "/" + matches[0], nil
} else {
return matches[0], nil
}

}
return "", fmt.Errorf("error: Mirror link %v does not exist in repo %v", mirror.URL, mirror.RepoName)
}

// Downloads the db from the mirror and adds MirrorPackages
func downloadAndParseDb(mirror MirrorDB) error {
matches := urlRegex.FindStringSubmatch(mirror.URL)
if len(matches) == 0 {
return fmt.Errorf("url '%v' is invalid, does not match path regex", mirror.URL)
}
prefix, err := getPrefixFromMirrorDB(mirror)
if err != nil {
// If a mirror is invalid, don't download & load it
return err
}

fileName := matches[4]
// create directory if it does not exist
tmpDir := path.Join(config.CacheDir, "tmp-db")
if _, err := os.Stat(tmpDir); os.IsNotExist(err) {
if err := os.MkdirAll(tmpDir, os.ModePerm); err != nil {
return err
}
}

filePath := filepath.Join(config.CacheDir, "tmp-db", fileName)
ifModifiedSince := time.Time{}
matches := pathRegex.FindStringSubmatch(mirror.URL)
if len(matches) == 0 {
return fmt.Errorf("url '%v' is invalid, does not match path regex", mirror.URL)
}
fileName := matches[3]
filePath := filepath.Join(tmpDir, fileName)
// download the db file
if _, err := downloadFile(mirror.URL, filePath, ifModifiedSince); err != nil {
if err := prefetchRequest(mirror.URL, tmpDir); err != nil {
return err
}
log.Printf("Extracting %v...", filePath)
Expand All @@ -188,7 +143,7 @@ func downloadAndParseDb(mirror MirrorDB) error {
log.Printf("Adding entries to db...")
var repoList []MirrorPackage
for _, fileName := range fileList {
rpkg, err := buildMirrorPkg(fileName, mirror.RepoName, prefix)
rpkg, err := buildMirrorPkg(fileName, mirror.RepoName, matches[2])
if err != nil {
// If a repo package has an invalid name
// e.g. is not a repo package, maybe it is a src package or whatever, we skip it
Expand Down
Loading

0 comments on commit ea1978e

Please sign in to comment.