4 * Copyright (c) 2006 Junio C Hamano
11 #include "tree-walk.h"
15 #ifndef NO_EXTERNAL_GREP
17 #define NO_EXTERNAL_GREP 0
19 #define NO_EXTERNAL_GREP 1
23 static int builtin_grep;
25 static int grep_config(const char *var, const char *value, void *cb)
27 struct grep_opt *opt = cb;
29 if (!strcmp(var, "color.grep")) {
30 opt->color = git_config_colorbool(var, value, -1);
33 if (!strcmp(var, "color.grep.external"))
34 return git_config_string(&(opt->color_external), var, value);
35 if (!strcmp(var, "color.grep.match")) {
37 return config_error_nonbool(var);
38 color_parse(value, var, opt->color_match);
41 return git_color_default_config(var, value, cb);
45 * git grep pathspecs are somewhat different from diff-tree pathspecs;
46 * pathname wildcards are allowed.
48 static int pathspec_matches(const char **paths, const char *name)
51 if (!paths || !*paths)
53 namelen = strlen(name);
54 for (i = 0; paths[i]; i++) {
55 const char *match = paths[i];
56 int matchlen = strlen(match);
57 const char *cp, *meta;
60 ((matchlen <= namelen) &&
61 !strncmp(name, match, matchlen) &&
62 (match[matchlen-1] == '/' ||
63 name[matchlen] == '\0' || name[matchlen] == '/')))
65 if (!fnmatch(match, name, 0))
67 if (name[namelen-1] != '/')
70 /* We are being asked if the directory ("name") is worth
73 * Find the longest leading directory name that does
74 * not have metacharacter in the pathspec; the name
75 * we are looking at must overlap with that directory.
77 for (cp = match, meta = NULL; cp - match < matchlen; cp++) {
79 if (ch == '*' || ch == '[' || ch == '?') {
85 meta = cp; /* fully literal */
87 if (namelen <= meta - match) {
88 /* Looking at "Documentation/" and
89 * the pattern says "Documentation/howto/", or
90 * "Documentation/diff*.txt". The name we
91 * have should match prefix.
93 if (!memcmp(match, name, namelen))
98 if (meta - match < namelen) {
99 /* Looking at "Documentation/howto/" and
100 * the pattern says "Documentation/h*";
101 * match up to "Do.../h"; this avoids descending
102 * into "Documentation/technical/".
104 if (!memcmp(match, name, meta - match))
112 static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *name, int tree_name_len)
116 enum object_type type;
117 char *to_free = NULL;
120 data = read_sha1_file(sha1, &type, &size);
122 error("'%s': unable to read %s", name, sha1_to_hex(sha1));
125 if (opt->relative && opt->prefix_length) {
126 static char name_buf[PATH_MAX];
128 int name_len = strlen(name) - opt->prefix_length + 1;
131 name += opt->prefix_length;
133 if (ARRAY_SIZE(name_buf) <= name_len)
134 cp = to_free = xmalloc(name_len);
137 memcpy(cp, name, tree_name_len);
138 strcpy(cp + tree_name_len,
139 name + tree_name_len + opt->prefix_length);
143 hit = grep_buffer(opt, name, data, size);
149 static int grep_file(struct grep_opt *opt, const char *filename)
156 if (lstat(filename, &st) < 0) {
159 error("'%s': %s", filename, strerror(errno));
163 return 0; /* empty file -- no grep hit */
164 if (!S_ISREG(st.st_mode))
166 sz = xsize_t(st.st_size);
167 i = open(filename, O_RDONLY);
170 data = xmalloc(sz + 1);
171 if (st.st_size != read_in_full(i, data, sz)) {
172 error("'%s': short read %s", filename, strerror(errno));
178 if (opt->relative && opt->prefix_length)
179 filename += opt->prefix_length;
180 i = grep_buffer(opt, filename, data, sz);
185 #if !NO_EXTERNAL_GREP
186 static int exec_grep(int argc, const char **argv)
196 execvp("grep", (char **) argv);
199 while (waitpid(pid, &status, 0) < 0) {
204 if (WIFEXITED(status)) {
205 if (!WEXITSTATUS(status))
214 #define push_arg(a) do { \
215 if (nr < MAXARGS) argv[nr++] = (a); \
216 else die("maximum number of args exceeded"); \
220 * If you send a singleton filename to grep, it does not give
221 * the name of the file. GNU grep has "-H" but we would want
222 * that behaviour in a portable way.
224 * So we keep two pathnames in argv buffer unsent to grep in
225 * the main loop if we need to do more than one grep.
227 static int flush_grep(struct grep_opt *opt,
228 int argc, int arg0, const char **argv, int *kept)
231 int count = argc - arg0;
232 const char *kept_0 = NULL;
236 * Because we keep at least 2 paths in the call from
237 * the main loop (i.e. kept != NULL), and MAXARGS is
238 * far greater than 2, this usually is a call to
239 * conclude the grep. However, the user could attempt
240 * to overflow the argv buffer by giving too many
241 * options to leave very small number of real
242 * arguments even for the call in the main loop.
245 die("insanely many options to grep");
248 * If we have two or more paths, we do not have to do
249 * anything special, but we need to push /dev/null to
250 * get "-H" behaviour of GNU grep portably but when we
251 * are not doing "-l" nor "-L" nor "-c".
255 !opt->unmatch_name_only &&
257 argv[argc++] = "/dev/null";
264 * Called because we found many paths and haven't finished
265 * iterating over the cache yet. We keep two paths
266 * for the concluding call. argv[argc-2] and argv[argc-1]
267 * has the last two paths, so save the first one away,
268 * replace it with NULL while sending the list to grep,
269 * and recover them after we are done.
272 kept_0 = argv[argc-2];
277 status = exec_grep(argc, argv);
281 * Then recover them. Now the last arg is beyond the
282 * terminating NULL which is at argc, and the second
283 * from the last is what we saved away in kept_0
285 argv[arg0++] = kept_0;
286 argv[arg0] = argv[argc+1];
291 static void grep_add_color(struct strbuf *sb, const char *escape_seq)
293 size_t orig_len = sb->len;
295 while (*escape_seq) {
296 if (*escape_seq == 'm')
297 strbuf_addch(sb, ';');
298 else if (*escape_seq != '\033' && *escape_seq != '[')
299 strbuf_addch(sb, *escape_seq);
302 if (sb->len > orig_len && sb->buf[sb->len - 1] == ';')
303 strbuf_setlen(sb, sb->len - 1);
306 static int external_grep(struct grep_opt *opt, const char **paths, int cached)
308 int i, nr, argc, hit, len, status;
309 const char *argv[MAXARGS+1];
310 char randarg[ARGBUF];
311 char *argptr = randarg;
314 if (opt->extended || (opt->relative && opt->prefix_length))
324 if (opt->regflags & REG_EXTENDED)
326 if (opt->regflags & REG_ICASE)
328 if (opt->binary == GREP_BINARY_NOMATCH)
330 if (opt->word_regexp)
334 if (opt->unmatch_name_only)
336 if (opt->null_following_name)
337 /* in GNU grep git's "-z" translates to "-Z" */
341 if (opt->post_context || opt->pre_context) {
342 if (opt->post_context != opt->pre_context) {
343 if (opt->pre_context) {
345 len += snprintf(argptr, sizeof(randarg)-len,
346 "%u", opt->pre_context) + 1;
347 if (sizeof(randarg) <= len)
348 die("maximum length of args exceeded");
352 if (opt->post_context) {
354 len += snprintf(argptr, sizeof(randarg)-len,
355 "%u", opt->post_context) + 1;
356 if (sizeof(randarg) <= len)
357 die("maximum length of args exceeded");
364 len += snprintf(argptr, sizeof(randarg)-len,
365 "%u", opt->post_context) + 1;
366 if (sizeof(randarg) <= len)
367 die("maximum length of args exceeded");
372 for (p = opt->pattern_list; p; p = p->next) {
374 push_arg(p->pattern);
377 struct strbuf sb = STRBUF_INIT;
379 grep_add_color(&sb, opt->color_match);
380 setenv("GREP_COLOR", sb.buf, 1);
383 strbuf_addstr(&sb, "mt=");
384 grep_add_color(&sb, opt->color_match);
385 strbuf_addstr(&sb, ":sl=:cx=:fn=:ln=:bn=:se=");
386 setenv("GREP_COLORS", sb.buf, 1);
390 if (opt->color_external && strlen(opt->color_external) > 0)
391 push_arg(opt->color_external);
396 for (i = 0; i < active_nr; i++) {
397 struct cache_entry *ce = active_cache[i];
400 if (!S_ISREG(ce->ce_mode))
402 if (!pathspec_matches(paths, ce->name))
405 if (name[0] == '-') {
406 int len = ce_namelen(ce);
407 name = xmalloc(len + 3);
408 memcpy(name, "./", 2);
409 memcpy(name + 2, ce->name, len + 1);
412 if (MAXARGS <= argc) {
413 status = flush_grep(opt, argc, nr, argv, &kept);
421 } while (i < active_nr &&
422 !strcmp(ce->name, active_cache[i]->name));
423 i--; /* compensate for loop control */
427 status = flush_grep(opt, argc, nr, argv, NULL);
435 static int grep_cache(struct grep_opt *opt, const char **paths, int cached)
441 #if !NO_EXTERNAL_GREP
443 * Use the external "grep" command for the case where
444 * we grep through the checked-out files. It tends to
445 * be a lot more optimized
447 if (!cached && !builtin_grep) {
448 hit = external_grep(opt, paths, cached);
455 for (nr = 0; nr < active_nr; nr++) {
456 struct cache_entry *ce = active_cache[nr];
457 if (!S_ISREG(ce->ce_mode))
459 if (!pathspec_matches(paths, ce->name))
462 * If CE_VALID is on, we assume worktree file and its cache entry
463 * are identical, even if worktree file has been modified, so use
464 * cache version instead
466 if (cached || (ce->ce_flags & CE_VALID)) {
469 hit |= grep_sha1(opt, ce->sha1, ce->name, 0);
472 hit |= grep_file(opt, ce->name);
476 } while (nr < active_nr &&
477 !strcmp(ce->name, active_cache[nr]->name));
478 nr--; /* compensate for loop control */
481 free_grep_patterns(opt);
485 static int grep_tree(struct grep_opt *opt, const char **paths,
486 struct tree_desc *tree,
487 const char *tree_name, const char *base)
491 struct name_entry entry;
493 int tn_len = strlen(tree_name);
494 struct strbuf pathbuf;
496 strbuf_init(&pathbuf, PATH_MAX + tn_len);
499 strbuf_add(&pathbuf, tree_name, tn_len);
500 strbuf_addch(&pathbuf, ':');
501 tn_len = pathbuf.len;
503 strbuf_addstr(&pathbuf, base);
506 while (tree_entry(tree, &entry)) {
507 int te_len = tree_entry_len(entry.path, entry.sha1);
509 strbuf_add(&pathbuf, entry.path, te_len);
511 if (S_ISDIR(entry.mode))
512 /* Match "abc/" against pathspec to
513 * decide if we want to descend into "abc"
516 strbuf_addch(&pathbuf, '/');
518 down = pathbuf.buf + tn_len;
519 if (!pathspec_matches(paths, down))
521 else if (S_ISREG(entry.mode))
522 hit |= grep_sha1(opt, entry.sha1, pathbuf.buf, tn_len);
523 else if (S_ISDIR(entry.mode)) {
524 enum object_type type;
525 struct tree_desc sub;
529 data = read_sha1_file(entry.sha1, &type, &size);
531 die("unable to read tree (%s)",
532 sha1_to_hex(entry.sha1));
533 init_tree_desc(&sub, data, size);
534 hit |= grep_tree(opt, paths, &sub, tree_name, down);
538 strbuf_release(&pathbuf);
542 static int grep_object(struct grep_opt *opt, const char **paths,
543 struct object *obj, const char *name)
545 if (obj->type == OBJ_BLOB)
546 return grep_sha1(opt, obj->sha1, name, 0);
547 if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) {
548 struct tree_desc tree;
552 data = read_object_with_reference(obj->sha1, tree_type,
555 die("unable to read tree (%s)", sha1_to_hex(obj->sha1));
556 init_tree_desc(&tree, data, size);
557 hit = grep_tree(opt, paths, &tree, name, "");
561 die("unable to grep from object of type %s", typename(obj->type));
564 static const char builtin_grep_usage[] =
565 "git grep <option>* [-e] <pattern> <rev>* [[--] <path>...]";
567 static const char emsg_invalid_context_len[] =
568 "%s: invalid context length argument";
569 static const char emsg_missing_context_len[] =
570 "missing context length argument";
571 static const char emsg_missing_argument[] =
572 "option requires an argument -%s";
574 int cmd_grep(int argc, const char **argv, const char *prefix)
578 int seen_dashdash = 0;
580 struct object_array list = { 0, 0, NULL };
581 const char **paths = NULL;
584 memset(&opt, 0, sizeof(opt));
585 opt.prefix_length = (prefix && *prefix) ? strlen(prefix) : 0;
588 opt.pattern_tail = &opt.pattern_list;
589 opt.regflags = REG_NEWLINE;
591 strcpy(opt.color_match, GIT_COLOR_RED GIT_COLOR_BOLD);
593 git_config(grep_config, &opt);
595 opt.color = git_use_color_default;
598 * If there is no -- then the paths must exist in the working
599 * tree. If there is no explicit pattern specified with -e or
600 * -f, we take the first unrecognized non option to be the
601 * pattern, but then what follows it must be zero or more
602 * valid refs up to the -- (if exists), and then existing
603 * paths. If there is an explicit pattern, then the first
604 * unrecognized non option is the beginning of the refs list
605 * that continues up to the -- (if exists), and then paths.
609 const char *arg = argv[1];
611 if (!strcmp("--cached", arg)) {
615 if (!strcmp("--no-ext-grep", arg)) {
619 if (!strcmp("-a", arg) ||
620 !strcmp("--text", arg)) {
621 opt.binary = GREP_BINARY_TEXT;
624 if (!strcmp("-i", arg) ||
625 !strcmp("--ignore-case", arg)) {
626 opt.regflags |= REG_ICASE;
629 if (!strcmp("-I", arg)) {
630 opt.binary = GREP_BINARY_NOMATCH;
633 if (!strcmp("-v", arg) ||
634 !strcmp("--invert-match", arg)) {
638 if (!strcmp("-E", arg) ||
639 !strcmp("--extended-regexp", arg)) {
640 opt.regflags |= REG_EXTENDED;
643 if (!strcmp("-F", arg) ||
644 !strcmp("--fixed-strings", arg)) {
648 if (!strcmp("-G", arg) ||
649 !strcmp("--basic-regexp", arg)) {
650 opt.regflags &= ~REG_EXTENDED;
653 if (!strcmp("-n", arg)) {
657 if (!strcmp("-h", arg)) {
661 if (!strcmp("-H", arg)) {
665 if (!strcmp("-l", arg) ||
666 !strcmp("--name-only", arg) ||
667 !strcmp("--files-with-matches", arg)) {
671 if (!strcmp("-L", arg) ||
672 !strcmp("--files-without-match", arg)) {
673 opt.unmatch_name_only = 1;
676 if (!strcmp("-z", arg) ||
677 !strcmp("--null", arg)) {
678 opt.null_following_name = 1;
681 if (!strcmp("-c", arg) ||
682 !strcmp("--count", arg)) {
686 if (!strcmp("-w", arg) ||
687 !strcmp("--word-regexp", arg)) {
691 if (!prefixcmp(arg, "-A") ||
692 !prefixcmp(arg, "-B") ||
693 !prefixcmp(arg, "-C") ||
694 (arg[0] == '-' && '1' <= arg[1] && arg[1] <= '9')) {
698 case 'A': case 'B': case 'C':
701 die(emsg_missing_context_len);
712 if (strtoul_ui(scan, 10, &num))
713 die(emsg_invalid_context_len, scan);
716 opt.post_context = num;
720 opt.post_context = num;
722 opt.pre_context = num;
727 if (!strcmp("-f", arg)) {
732 die(emsg_missing_argument, arg);
733 patterns = fopen(argv[1], "r");
735 die("'%s': %s", argv[1], strerror(errno));
736 while (fgets(buf, sizeof(buf), patterns)) {
737 int len = strlen(buf);
738 if (len && buf[len-1] == '\n')
740 /* ignore empty line like grep does */
743 append_grep_pattern(&opt, xstrdup(buf),
752 if (!strcmp("--not", arg)) {
753 append_grep_pattern(&opt, arg, "command line", 0,
757 if (!strcmp("--and", arg)) {
758 append_grep_pattern(&opt, arg, "command line", 0,
762 if (!strcmp("--or", arg))
763 continue; /* no-op */
764 if (!strcmp("(", arg)) {
765 append_grep_pattern(&opt, arg, "command line", 0,
769 if (!strcmp(")", arg)) {
770 append_grep_pattern(&opt, arg, "command line", 0,
774 if (!strcmp("--all-match", arg)) {
778 if (!strcmp("-e", arg)) {
780 append_grep_pattern(&opt, argv[1],
787 die(emsg_missing_argument, arg);
789 if (!strcmp("--full-name", arg)) {
793 if (!strcmp("--color", arg)) {
797 if (!strcmp("--no-color", arg)) {
801 if (!strcmp("--", arg)) {
802 /* later processing wants to have this at argv[1] */
808 usage(builtin_grep_usage);
810 /* First unrecognized non-option token */
811 if (!opt.pattern_list) {
812 append_grep_pattern(&opt, arg, "command line", 0,
817 /* We are looking at the first path or rev;
818 * it is found at argv[1] after leaving the
826 if (opt.color && !opt.color_external)
828 if (!opt.pattern_list)
829 die("no pattern given.");
830 if ((opt.regflags != REG_NEWLINE) && opt.fixed)
831 die("cannot mix --fixed-strings and regexp");
832 compile_grep_patterns(&opt);
834 /* Check revs and then paths */
835 for (i = 1; i < argc; i++) {
836 const char *arg = argv[i];
837 unsigned char sha1[20];
839 if (!get_sha1(arg, sha1)) {
840 struct object *object = parse_object(sha1);
842 die("bad object %s", arg);
843 add_object_array(object, arg, &list);
846 if (!strcmp(arg, "--")) {
853 /* The rest are paths */
854 if (!seen_dashdash) {
856 for (j = i; j < argc; j++)
857 verify_filename(prefix, argv[j]);
861 paths = get_pathspec(prefix, argv + i);
862 if (opt.prefix_length && opt.relative) {
863 /* Make sure we do not get outside of paths */
864 for (i = 0; paths[i]; i++)
865 if (strncmp(prefix, paths[i], opt.prefix_length))
866 die("git grep: cannot generate relative filenames containing '..'");
870 paths = xcalloc(2, sizeof(const char *));
878 return !grep_cache(&opt, paths, cached);
882 die("both --cached and trees are given.");
884 for (i = 0; i < list.nr; i++) {
885 struct object *real_obj;
886 real_obj = deref_tag(list.objects[i].item, NULL, 0);
887 if (grep_object(&opt, paths, real_obj, list.objects[i].name))
890 free_grep_patterns(&opt);