Skip to content

Commit 6a95193

Browse files
peffgitster
authored andcommitted
cat-file: add --batch-all-objects option
It can sometimes be useful to examine all objects in the repository. Normally this is done with "git rev-list --all --objects", but: 1. That shows only reachable objects. You may want to look at all available objects. 2. It's slow. We actually open each object to walk the graph. If your operation is OK with seeing unreachable objects, it's an order of magnitude faster to just enumerate the loose directories and pack indices. You can do this yourself using "ls" and "git show-index", but it's non-obvious. This patch adds an option to "cat-file --batch-check" to operate on all available objects (rather than reading names from stdin). This is based on a proposal by Charles Bailey to provide a separate "git list-all-objects" command. That is more orthogonal, as it splits enumerating the objects from getting information about them. However, in practice you will either: a. Feed the list of objects directly into cat-file anyway, so you can find out information about them. Keeping it in a single process is more efficient. b. Ask the listing process to start telling you more information about the objects, in which case you will reinvent cat-file's batch-check formatter. Adding a cat-file option is simple and efficient. And if you really do want just the object names, you can always do: git cat-file --batch-check='%(objectname)' --batch-all-objects Signed-off-by: Jeff King <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 44b877e commit 6a95193

File tree

3 files changed

+77
-2
lines changed

3 files changed

+77
-2
lines changed

Documentation/git-cat-file.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,14 @@ OPTIONS
6969
not be combined with any other options or arguments. See the
7070
section `BATCH OUTPUT` below for details.
7171

72+
--batch-all-objects::
73+
Instead of reading a list of objects on stdin, perform the
74+
requested batch operation on all objects in the repository and
75+
any alternate object stores (not just reachable objects).
76+
Requires `--batch` or `--batch-check` be specified. Note that
77+
the order of the objects is unspecified, and there may be
78+
duplicate entries.
79+
7280
--buffer::
7381
Normally batch output is flushed after each object is output, so
7482
that a process can interactively read and write from

builtin/cat-file.c

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ struct batch_options {
1515
int follow_symlinks;
1616
int print_contents;
1717
int buffer_output;
18+
int all_objects;
1819
const char *format;
1920
};
2021

@@ -257,7 +258,7 @@ static void batch_object_write(const char *obj_name, struct batch_options *opt,
257258
struct strbuf buf = STRBUF_INIT;
258259

259260
if (sha1_object_info_extended(data->sha1, &data->info, LOOKUP_REPLACE_OBJECT) < 0) {
260-
printf("%s missing\n", obj_name);
261+
printf("%s missing\n", obj_name ? obj_name : sha1_to_hex(data->sha1));
261262
fflush(stdout);
262263
return;
263264
}
@@ -318,6 +319,34 @@ static void batch_one_object(const char *obj_name, struct batch_options *opt,
318319
batch_object_write(obj_name, opt, data);
319320
}
320321

322+
struct object_cb_data {
323+
struct batch_options *opt;
324+
struct expand_data *expand;
325+
};
326+
327+
static int batch_object_cb(const unsigned char *sha1,
328+
struct object_cb_data *data)
329+
{
330+
hashcpy(data->expand->sha1, sha1);
331+
batch_object_write(NULL, data->opt, data->expand);
332+
return 0;
333+
}
334+
335+
static int batch_loose_object(const unsigned char *sha1,
336+
const char *path,
337+
void *data)
338+
{
339+
return batch_object_cb(sha1, data);
340+
}
341+
342+
static int batch_packed_object(const unsigned char *sha1,
343+
struct packed_git *pack,
344+
uint32_t pos,
345+
void *data)
346+
{
347+
return batch_object_cb(sha1, data);
348+
}
349+
321350
static int batch_objects(struct batch_options *opt)
322351
{
323352
struct strbuf buf = STRBUF_INIT;
@@ -345,6 +374,15 @@ static int batch_objects(struct batch_options *opt)
345374
if (opt->print_contents)
346375
data.info.typep = &data.type;
347376

377+
if (opt->all_objects) {
378+
struct object_cb_data cb;
379+
cb.opt = opt;
380+
cb.expand = &data;
381+
for_each_loose_object(batch_loose_object, &cb, 0);
382+
for_each_packed_object(batch_packed_object, &cb, 0);
383+
return 0;
384+
}
385+
348386
/*
349387
* We are going to call get_sha1 on a potentially very large number of
350388
* objects. In most large cases, these will be actual object sha1s. The
@@ -436,6 +474,8 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
436474
PARSE_OPT_OPTARG, batch_option_callback },
437475
OPT_BOOL(0, "follow-symlinks", &batch.follow_symlinks,
438476
N_("follow in-tree symlinks (used with --batch or --batch-check)")),
477+
OPT_BOOL(0, "batch-all-objects", &batch.all_objects,
478+
N_("show all objects with --batch or --batch-check")),
439479
OPT_END()
440480
};
441481

@@ -460,7 +500,7 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix)
460500
usage_with_options(cat_file_usage, options);
461501
}
462502

463-
if (batch.follow_symlinks && !batch.enabled) {
503+
if ((batch.follow_symlinks || batch.all_objects) && !batch.enabled) {
464504
usage_with_options(cat_file_usage, options);
465505
}
466506

t/t1006-cat-file.sh

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,4 +547,31 @@ test_expect_success 'git cat-file --batch --follow-symlink returns correct sha a
547547
test_cmp expect actual
548548
'
549549

550+
test_expect_success 'cat-file --batch-all-objects shows all objects' '
551+
# make new repos so we now the full set of objects; we will
552+
# also make sure that there are some packed and some loose
553+
# objects, some referenced and some not, and that there are
554+
# some available only via alternates.
555+
git init all-one &&
556+
(
557+
cd all-one &&
558+
echo content >file &&
559+
git add file &&
560+
git commit -qm base &&
561+
git rev-parse HEAD HEAD^{tree} HEAD:file &&
562+
git repack -ad &&
563+
echo not-cloned | git hash-object -w --stdin
564+
) >expect.unsorted &&
565+
git clone -s all-one all-two &&
566+
(
567+
cd all-two &&
568+
echo local-unref | git hash-object -w --stdin
569+
) >>expect.unsorted &&
570+
sort <expect.unsorted >expect &&
571+
git -C all-two cat-file --batch-all-objects \
572+
--batch-check="%(objectname)" >actual.unsorted &&
573+
sort <actual.unsorted >actual &&
574+
test_cmp expect actual
575+
'
576+
550577
test_done

0 commit comments

Comments
 (0)