Skip to content

Commit

Permalink
pack-objects: allow --path-walk with --shallow (#699)
Browse files Browse the repository at this point in the history
This pull request aims to correct a pretty big issue when dealing with
UNINTERESTING objects in the path-walk API. They somehow were only
exposed when trying to perform a push from a shallow clone.

This will require rewriting the upstream version so this is avoided from
the start, but we can do a forward fix for now.

The key issue is that the path-walk API was not walking UNINTERESTING
trees at the right time, and the way it was being done was more
complicated than it needed to be. This changes some of the way the
path-walk API works in the presence of UNINTERSTING commits, but these
are good changes to make.

I had briefly attempted to remove the use of the `edge_aggressive`
option in `struct path_walk_info` in favor of using the
`--objects-edge-aggressive` option in the revision struct. When I
started down that road, though, I somehow got myself into a bind of
things not working correctly. I backed out to this version that is
working with our test cases.

I tested this using the thin and big pack tests in `p5313` which had the
same performance as before this change.

The new change is that in a shallow clone we can get the same `git push`
improvements.

I was hung up on testing this for a long time as I wasn't getting the
same results in my shallow clone as in my regular clones. It turns out
that I had forgotten to use `--no-reuse-delta` in my test command, so it
was picking the deltas that were given by the initial clone instead of
picking new ones per the algorithm. 🤦🏻
  • Loading branch information
dscho committed Jan 1, 2025
2 parents b6c9927 + 2cd3a3f commit 04428e4
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 38 deletions.
8 changes: 8 additions & 0 deletions Documentation/technical/api-path-walk.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,14 @@ better off using the revision walk API instead.
the revision walk so that the walk emits commits marked with the
`UNINTERESTING` flag.

`edge_aggressive`::
For performance reasons, usually only the boundary commits are
explored to find UNINTERESTING objects. However, in the case of
shallow clones it can be helpful to mark all trees and blobs
reachable from UNINTERESTING tip commits as UNINTERESTING. This
matches the behavior of `--objects-edge-aggressive` in the
revision API.

`pl`::
This pattern list pointer allows focusing the path-walk search to
a set of patterns, only emitting paths that match the given
Expand Down
7 changes: 2 additions & 5 deletions builtin/pack-objects.c
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ static int keep_unreachable, unpack_unreachable, include_tag;
static timestamp_t unpack_unreachable_expiration;
static int pack_loose_unreachable;
static int cruft;
static int shallow = 0;
static timestamp_t cruft_expiration;
static int local;
static int have_non_local_packs;
Expand Down Expand Up @@ -4429,6 +4430,7 @@ static void get_object_list_path_walk(struct rev_info *revs)
* base objects.
*/
info.prune_all_uninteresting = sparse;
info.edge_aggressive = shallow;

if (walk_objects_by_path(&info))
die(_("failed to pack objects via path-walk"));
Expand Down Expand Up @@ -4630,7 +4632,6 @@ int cmd_pack_objects(int argc,
struct repository *repo UNUSED)
{
int use_internal_rev_list = 0;
int shallow = 0;
int all_progress_implied = 0;
struct strvec rp = STRVEC_INIT;
int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
Expand Down Expand Up @@ -4818,10 +4819,6 @@ int cmd_pack_objects(int argc,
warning(_("cannot use delta islands with --path-walk"));
path_walk = 0;
}
if (path_walk && shallow) {
warning(_("cannot use --shallow with --path-walk"));
path_walk = 0;
}
if (path_walk) {
strvec_push(&rp, "--boundary");
/*
Expand Down
60 changes: 35 additions & 25 deletions path-walk.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "trace2.h"
#include "tree.h"
#include "tree-walk.h"
#include "list-objects.h"

struct type_and_oid_list
{
Expand Down Expand Up @@ -233,6 +234,26 @@ static void clear_strmap(struct strmap *map)
strmap_init(map);
}

static struct repository *edge_repo;
static struct type_and_oid_list *edge_tree_list;

static void show_edge(struct commit *commit)
{
struct tree *t = repo_get_commit_tree(edge_repo, commit);

if (!t)
return;

if (commit->object.flags & UNINTERESTING)
t->object.flags |= UNINTERESTING;

if (t->object.flags & SEEN)
return;
t->object.flags |= SEEN;

oid_array_append(&edge_tree_list->oids, &t->object.oid);
}

/**
* Given the configuration of 'info', walk the commits based on 'info->revs' and
* call 'info->path_fn' on each discovered path.
Expand All @@ -242,7 +263,7 @@ static void clear_strmap(struct strmap *map)
int walk_objects_by_path(struct path_walk_info *info)
{
const char *root_path = "";
int ret = 0, has_uninteresting = 0;
int ret = 0;
size_t commits_nr = 0, paths_nr = 0;
struct commit *c;
struct type_and_oid_list *root_tree_list;
Expand All @@ -254,7 +275,6 @@ int walk_objects_by_path(struct path_walk_info *info)
.path_stack = STRING_LIST_INIT_DUP,
.paths_to_lists = STRMAP_INIT
};
struct oidset root_tree_set = OIDSET_INIT;

trace2_region_enter("path-walk", "commit-walk", info->revs->repo);

Expand All @@ -280,6 +300,18 @@ int walk_objects_by_path(struct path_walk_info *info)
if (prepare_revision_walk(info->revs))
die(_("failed to setup revision walk"));

/*
* Do an initial walk of tip commits in info->revs->commits and
* info->revs->cmdline.rev to match the standard edge-walk behavior.
*
* This is particularly important when 'edge_aggressive' is set.
*/
info->revs->edge_hint_aggressive = info->edge_aggressive;

edge_repo = info->revs->repo;
edge_tree_list = root_tree_list;
mark_edges_uninteresting(info->revs, show_edge, info->prune_all_uninteresting);

info->revs->blob_objects = info->revs->tree_objects = 0;

if (info->tags) {
Expand Down Expand Up @@ -366,17 +398,10 @@ int walk_objects_by_path(struct path_walk_info *info)
if (t->object.flags & SEEN)
continue;
t->object.flags |= SEEN;

if (!oidset_insert(&root_tree_set, oid))
oid_array_append(&root_tree_list->oids, oid);
oid_array_append(&root_tree_list->oids, oid);
} else {
warning("could not find tree %s", oid_to_hex(oid));
}

if (t && (c->object.flags & UNINTERESTING)) {
t->object.flags |= UNINTERESTING;
has_uninteresting = 1;
}
}

trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr);
Expand All @@ -389,21 +414,6 @@ int walk_objects_by_path(struct path_walk_info *info)
oid_array_clear(&commit_list->oids);
free(commit_list);

/*
* Before performing a DFS of our paths and emitting them as interesting,
* do a full walk of the trees to distribute the UNINTERESTING bit. Use
* the sparse algorithm if prune_all_uninteresting was set.
*/
if (has_uninteresting) {
trace2_region_enter("path-walk", "uninteresting-walk", info->revs->repo);
if (info->prune_all_uninteresting)
mark_trees_uninteresting_sparse(ctx.repo, &root_tree_set);
else
mark_trees_uninteresting_dense(ctx.repo, &root_tree_set);
trace2_region_leave("path-walk", "uninteresting-walk", info->revs->repo);
}
oidset_clear(&root_tree_set);

string_list_append(&ctx.path_stack, root_path);

trace2_region_enter("path-walk", "path-walk", info->revs->repo);
Expand Down
7 changes: 7 additions & 0 deletions path-walk.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ struct path_walk_info {
*/
int prune_all_uninteresting;

/**
* When 'edge_aggressive' is set, then the revision walk will use
* the '--object-edge-aggressive' option to mark even more objects
* as uninteresting.
*/
int edge_aggressive;

/**
* Specify a sparse-checkout definition to match our paths to. Do not
* walk outside of this sparse definition. If the patterns are in
Expand Down
34 changes: 34 additions & 0 deletions t/t5538-push-shallow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -123,4 +123,38 @@ EOF
git cat-file blob $(echo 1|git hash-object --stdin) >/dev/null
)
'

test_expect_success 'push new commit from shallow clone has correct object count' '
git init origin &&
test_commit -C origin a &&
test_commit -C origin b &&
git clone --depth=1 "file://$(pwd)/origin" client &&
git -C client checkout -b topic &&
git -C client commit --allow-empty -m "empty" &&
GIT_PROGRESS_DELAY=0 git -C client push --progress origin topic 2>err &&
test_grep "Enumerating objects: 1, done." err
'

test_expect_success 'push new commit from shallow clone has good deltas' '
git init base &&
test_seq 1 999 >base/a &&
test_commit -C base initial &&
git -C base add a &&
git -C base commit -m "big a" &&
git clone --depth=1 "file://$(pwd)/base" deltas &&
git -C deltas checkout -b deltas &&
test_seq 1 1000 >deltas/a &&
git -C deltas commit -a -m "bigger a" &&
GIT_TRACE2_PERF="$(pwd)/trace.txt" \
GIT_PROGRESS_DELAY=0 git -C deltas push --progress origin deltas 2>err &&
test_grep "Enumerating objects: 5, done" err &&
# If the delta base is found, then this message uses "bytes".
# If the delta base is not found, then this message uses "KiB".
test_grep "Writing objects: .* bytes" err
'

test_done
2 changes: 2 additions & 0 deletions t/t5616-partial-clone.sh
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,7 @@ test_expect_success 'fetch lazy-fetches only to resolve deltas' '
# used as delta bases!
GIT_TRACE_PACKET="$(pwd)/trace" \
GIT_TEST_FULL_NAME_HASH=0 \
GIT_TEST_PACK_PATH_WALK=0 \
git -C client \
fetch "file://$(pwd)/server" main &&
Expand Down Expand Up @@ -556,6 +557,7 @@ test_expect_success 'fetch lazy-fetches only to resolve deltas, protocol v2' '
# used as delta bases!
GIT_TRACE_PACKET="$(pwd)/trace" \
GIT_TEST_FULL_NAME_HASH=0 \
GIT_TEST_PACK_PATH_WALK=0 \
git -C client \
fetch "file://$(pwd)/server" main &&
Expand Down
16 changes: 8 additions & 8 deletions t/t6601-path-walk.sh
Original file line number Diff line number Diff line change
Expand Up @@ -181,13 +181,13 @@ test_expect_success 'topic, not base' '
COMMIT::$(git rev-parse topic)
commits:1
TREE::$(git rev-parse topic^{tree})
TREE:left/:$(git rev-parse topic:left)
TREE:left/:$(git rev-parse base~1:left):UNINTERESTING
TREE:right/:$(git rev-parse topic:right)
trees:3
BLOB:a:$(git rev-parse topic:a)
BLOB:left/b:$(git rev-parse topic:left/b)
BLOB:a:$(git rev-parse base~1:a):UNINTERESTING
BLOB:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
BLOB:right/c:$(git rev-parse topic:right/c)
BLOB:right/d:$(git rev-parse topic:right/d)
BLOB:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
blobs:4
tags:0
EOF
Expand All @@ -205,10 +205,10 @@ test_expect_success 'topic, not base, only blobs' '
cat >expect <<-EOF &&
commits:0
trees:0
BLOB:a:$(git rev-parse topic:a)
BLOB:left/b:$(git rev-parse topic:left/b)
BLOB:a:$(git rev-parse base~1:a):UNINTERESTING
BLOB:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
BLOB:right/c:$(git rev-parse topic:right/c)
BLOB:right/d:$(git rev-parse topic:right/d)
BLOB:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
blobs:4
tags:0
EOF
Expand Down Expand Up @@ -246,7 +246,7 @@ test_expect_success 'topic, not base, only trees' '
cat >expect <<-EOF &&
commits:0
TREE::$(git rev-parse topic^{tree})
TREE:left/:$(git rev-parse topic:left)
TREE:left/:$(git rev-parse base~1:left):UNINTERESTING
TREE:right/:$(git rev-parse topic:right)
trees:3
blobs:0
Expand Down

0 comments on commit 04428e4

Please sign in to comment.