commit 8517c370c59976628da6168ebb6e0ae4590f74b5 from: Stefan Sperling via: Thomas Adam date: Fri Jan 24 22:02:34 2025 UTC do not delete directly referenced trees and blobs during 'gotadmin cleanup' We only considered referenced commit and tag objects during cleanup. Directly referenced trees or blobs were seen as unreferenced, and deleted during cleanup. Add test coverage for this problem and fix it. commit - e5148b503587aff015bfa9bd5db6ff142529cc10 commit + 8517c370c59976628da6168ebb6e0ae4590f74b5 blob - 1197d7902e8a188ee3b31346586a09ae40a16f43 blob + d0e57adc0ed74a6bc614e805ff6e35851221db21 --- lib/repository_admin.c +++ lib/repository_admin.c @@ -987,11 +987,19 @@ load_commit_or_tag(int *ncommits, struct got_object_id &qid->id); if (err) goto done; + tree_id = got_object_commit_get_tree_id(commit); break; case GOT_OBJ_TYPE_TAG: err = got_object_open_as_tag(&tag, repo, &qid->id); if (err) goto done; + /* tree_id will be set below */ + break; + case GOT_OBJ_TYPE_TREE: + tree_id = &qid->id; + break; + case GOT_OBJ_TYPE_BLOB: + tree_id = NULL; break; default: /* should not happen */ @@ -999,10 +1007,8 @@ load_commit_or_tag(int *ncommits, struct got_object_id goto done; } - /* Find a tree object to scan. */ - if (commit) { - tree_id = got_object_commit_get_tree_id(commit); - } else if (tag) { + if (tag) { + /* Find a tree object to scan. */ obj_type = got_object_tag_get_object_type(tag); switch (obj_type) { case GOT_OBJ_TYPE_COMMIT: @@ -1030,6 +1036,9 @@ load_commit_or_tag(int *ncommits, struct got_object_id goto done; break; } + } else if (tree_id == NULL) { + /* Blob which has already been marked as traversed. */ + continue; } if (tree_id) { @@ -1037,6 +1046,7 @@ load_commit_or_tag(int *ncommits, struct got_object_id repo, cancel_cb, cancel_arg); if (err) break; + tree_id = NULL; } if (commit || tag) @@ -1528,8 +1538,7 @@ got_repo_cleanup(struct got_repository *repo, } err = get_reflist_object_ids(&referenced_ids, &nreferenced, - (1 << GOT_OBJ_TYPE_COMMIT) | (1 << GOT_OBJ_TYPE_TAG), - &refs, repo, cancel_cb, cancel_arg); + GOT_OBJ_TYPE_ANY, &refs, repo, cancel_cb, cancel_arg); if (err) goto done; blob - f4099794bb7ebc53f96a1addc9b3d92c43e8c93d blob + 8b953f1ee28370617e8dbb0865f36ea6604711e0 --- regress/cmdline/cleanup.sh +++ regress/cmdline/cleanup.sh @@ -459,10 +459,101 @@ test_cleanup_missing_pack_file() { fi test_done "$testroot" "$ret" } + +test_cleanup_non_commit_ref() { + local testroot=`test_init cleanup_non_commit_ref` + local commit_id=`git_show_head $testroot/repo` + + mkdir -p $testroot/t + echo foo > $testroot/t/foo + + foo_id=$(git -C $testroot/repo hash-object -t blob -w $testroot/t/foo) + + # verify that the blob object can be read + got cat -r $testroot/repo "$foo_id" > $testroot/stdout + cmp -s $testroot/t/foo $testroot/stdout + ret=$? + if [ $ret -ne 0 ]; then + diff -u $testroot/t/foo $testroot/stdout + test_done "$testroot" "$ret" + return 1 + fi + # create a reference which points at the blob + got ref -r $testroot/repo -c $foo_id blobref + + # create a tree object + printf "10644 blob $foo_id\tfoo\n" > $testroot/tree-desc + tree_id=$(git -C $testroot/repo mktree < $testroot/tree-desc) + + # verify that the tree object can be read + got cat -r $testroot/repo "$tree_id" > $testroot/stdout + printf "$foo_id 0010644 foo\n" > $testroot/stdout.expected + cmp -s $testroot/stdout.expected $testroot/stdout + ret=$? + if [ $ret -ne 0 ]; then + diff -u $testroot/stdout.expected $testroot/stdout + test_done "$testroot" "$ret" + return 1 + fi + + # create a reference which points at the tree + got ref -r $testroot/repo -c "$tree_id" treeref + + # list references + got ref -r $testroot/repo -l > $testroot/stdout + cat > $testroot/stdout.expected < $testroot/stdout \ + 2> $testroot/stderr + ret=$? + if [ $ret -ne 0 ]; then + echo "gotadmin cleanup failed unexpectedly" >&2 + test_done "$testroot" "1" + return 1 + fi + + # verify that the blob object can be read + got cat -r $testroot/repo "$foo_id" > $testroot/stdout + cmp -s $testroot/t/foo $testroot/stdout + ret=$? + if [ $ret -ne 0 ]; then + diff -u $testroot/t/foo $testroot/stdout + test_done "$testroot" "$ret" + return 1 + fi + + # verify that the tree object can be read + got cat -r $testroot/repo "$tree_id" > $testroot/stdout + printf "$foo_id 0010644 foo\n" > $testroot/stdout.expected + cmp -s $testroot/stdout.expected $testroot/stdout + ret=$? + if [ $ret -ne 0 ]; then + diff -u $testroot/stdout.expected $testroot/stdout + test_done "$testroot" "$ret" + return 1 + fi + + test_done "$testroot" "$ret" +} + test_parseargs "$@" run_test test_cleanup_unreferenced_loose_objects run_test test_cleanup_redundant_loose_objects run_test test_cleanup_redundant_pack_files run_test test_cleanup_precious_objects run_test test_cleanup_missing_pack_file +run_test test_cleanup_non_commit_ref