2 * Copyright (C) 2005 Junio C Hamano
12 static const char *diff_opts = "-pu";
13 static unsigned char null_sha1[20] = { 0, };
14 #define MAX_SCORE 10000
15 #define DEFAULT_MINIMUM_SCORE 5000
17 static const char *external_diff(void)
19 static const char *external_diff_cmd = NULL;
20 static int done_preparing = 0;
23 return external_diff_cmd;
26 * Default values above are meant to match the
27 * Linux kernel development style. Examples of
28 * alternative styles you can specify via environment
33 if (gitenv("GIT_EXTERNAL_DIFF"))
34 external_diff_cmd = gitenv("GIT_EXTERNAL_DIFF");
36 /* In case external diff fails... */
37 diff_opts = gitenv("GIT_DIFF_OPTS") ? : diff_opts;
40 return external_diff_cmd;
43 /* Help to copy the thing properly quoted for the shell safety.
44 * any single quote is replaced with '\'', and the caller is
45 * expected to enclose the result within a single quote pair.
48 * original sq_expand result
49 * name ==> name ==> 'name'
50 * a b ==> a b ==> 'a b'
51 * a'b ==> a'\''b ==> 'a'\''b'
53 static char *sq_expand(const char *src)
55 static char *buf = NULL;
60 /* count bytes needed to store the quoted string. */
61 for (cnt = 1, cp = src; *cp; cnt++, cp++)
67 while ((c = *src++)) {
71 bp = strcpy(bp, "'\\''");
79 static struct diff_tempfile {
87 unsigned char blob_sha1[20];
88 unsigned short mode; /* file mode */
89 unsigned sha1_valid : 1; /* if true, use blob_sha1 and trust mode;
90 * if false, use the name and read from
93 unsigned file_valid : 1; /* if false the file does not exist */
96 static void builtin_diff(const char *name_a,
98 struct diff_tempfile *temp,
101 int i, next_at, cmd_size;
102 const char *diff_cmd = "diff -L'%s%s' -L'%s%s'";
103 const char *diff_arg = "'%s' '%s'||:"; /* "||:" is to return 0 */
104 const char *input_name_sq[2];
105 const char *path0[2];
106 const char *path1[2];
107 const char *name_sq[2];
110 name_sq[0] = sq_expand(name_a);
111 name_sq[1] = sq_expand(name_b);
113 /* diff_cmd and diff_arg have 6 %s in total which makes
114 * the sum of these strings 12 bytes larger than required.
115 * we use 2 spaces around diff-opts, and we need to count
116 * terminating NUL, so we subtract 9 here.
118 cmd_size = (strlen(diff_cmd) + strlen(diff_opts) +
119 strlen(diff_arg) - 9);
120 for (i = 0; i < 2; i++) {
121 input_name_sq[i] = sq_expand(temp[i].name);
122 if (!strcmp(temp[i].name, "/dev/null")) {
123 path0[i] = "/dev/null";
126 path0[i] = i ? "b/" : "a/";
127 path1[i] = name_sq[i];
129 cmd_size += (strlen(path0[i]) + strlen(path1[i]) +
130 strlen(input_name_sq[i]));
133 cmd = xmalloc(cmd_size);
136 next_at += snprintf(cmd+next_at, cmd_size-next_at,
138 path0[0], path1[0], path0[1], path1[1]);
139 next_at += snprintf(cmd+next_at, cmd_size-next_at,
141 next_at += snprintf(cmd+next_at, cmd_size-next_at,
142 diff_arg, input_name_sq[0], input_name_sq[1]);
144 printf("diff --git a/%s b/%s\n", name_a, name_b);
146 printf("new file mode %s\n", temp[1].mode);
147 else if (!path1[1][0])
148 printf("deleted file mode %s\n", temp[0].mode);
150 if (strcmp(temp[0].mode, temp[1].mode)) {
151 printf("old mode %s\n", temp[0].mode);
152 printf("new mode %s\n", temp[1].mode);
154 if (strcmp(name_a, name_b)) {
155 if (0 < rename_score)
156 printf("rename similarity index %d%%\n",
158 rename_score*100.0/MAX_SCORE));
159 printf("rename old %s\n", name_a);
160 printf("rename new %s\n", name_b);
162 if (strncmp(temp[0].mode, temp[1].mode, 3))
163 /* we do not run diff between different kind
169 execlp("/bin/sh","sh", "-c", cmd, NULL);
173 * Given a name and sha1 pair, if the dircache tells us the file in
174 * the work tree has that object contents, return true, so that
175 * prepare_temp_file() does not have to inflate and extract.
177 static int work_tree_matches(const char *name, const unsigned char *sha1)
179 struct cache_entry *ce;
183 /* We do not read the cache ourselves here, because the
184 * benchmark with my previous version that always reads cache
185 * shows that it makes things worse for diff-tree comparing
186 * two linux-2.6 kernel trees in an already checked out work
187 * tree. This is because most diff-tree comparisons deal with
188 * only a small number of files, while reading the cache is
189 * expensive for a large project, and its cost outweighs the
190 * savings we get by not inflating the object to a temporary
191 * file. Practically, this code only helps when we are used
192 * by diff-cache --cached, which does read the cache before
199 pos = cache_name_pos(name, len);
202 ce = active_cache[pos];
203 if ((lstat(name, &st) < 0) ||
204 !S_ISREG(st.st_mode) ||
205 ce_match_stat(ce, &st) ||
206 memcmp(sha1, ce->sha1, 20))
211 static void prep_temp_blob(struct diff_tempfile *temp,
219 strcpy(temp->tmp_path, ".diff_XXXXXX");
220 fd = mkstemp(temp->tmp_path);
222 die("unable to create temp-file");
223 if (write(fd, blob, size) != size)
224 die("unable to write temp-file");
226 temp->name = temp->tmp_path;
227 strcpy(temp->hex, sha1_to_hex(sha1));
229 sprintf(temp->mode, "%06o", mode);
232 static void prepare_temp_file(const char *name,
233 struct diff_tempfile *temp,
234 struct diff_spec *one)
236 if (!one->file_valid) {
238 /* A '-' entry produces this for file-2, and
239 * a '+' entry produces this for file-1.
241 temp->name = "/dev/null";
242 strcpy(temp->hex, ".");
243 strcpy(temp->mode, ".");
247 if (!one->sha1_valid ||
248 work_tree_matches(name, one->blob_sha1)) {
251 if (lstat(temp->name, &st) < 0) {
253 goto not_a_valid_file;
254 die("stat(%s): %s", temp->name, strerror(errno));
256 if (S_ISLNK(st.st_mode)) {
258 char *buf, buf_[1024];
259 buf = ((sizeof(buf_) < st.st_size) ?
260 xmalloc(st.st_size) : buf_);
261 ret = readlink(name, buf, st.st_size);
263 die("readlink(%s)", name);
264 prep_temp_blob(temp, buf, st.st_size,
266 one->blob_sha1 : null_sha1),
268 one->mode : S_IFLNK));
271 if (!one->sha1_valid)
272 strcpy(temp->hex, sha1_to_hex(null_sha1));
274 strcpy(temp->hex, sha1_to_hex(one->blob_sha1));
275 sprintf(temp->mode, "%06o",
276 S_IFREG |ce_permissions(st.st_mode));
285 blob = read_sha1_file(one->blob_sha1, type, &size);
286 if (!blob || strcmp(type, "blob"))
287 die("unable to read blob object for %s (%s)",
288 name, sha1_to_hex(one->blob_sha1));
289 prep_temp_blob(temp, blob, size, one->blob_sha1, one->mode);
294 static void remove_tempfile(void)
298 for (i = 0; i < 2; i++)
299 if (diff_temp[i].name == diff_temp[i].tmp_path) {
300 unlink(diff_temp[i].name);
301 diff_temp[i].name = NULL;
305 static void remove_tempfile_on_signal(int signo)
310 static int detect_rename;
311 static int reverse_diff;
312 static int diff_raw_output = -1;
313 static const char **pathspec;
315 static int minimum_score;
317 static int matches_pathspec(const char *name)
325 namelen = strlen(name);
326 for (i = 0; i < speccnt; i++) {
327 int speclen = strlen(pathspec[i]);
328 if (! strncmp(pathspec[i], name, speclen) &&
329 speclen <= namelen &&
330 (name[speclen] == 0 || name[speclen] == '/'))
336 /* An external diff command takes:
338 * diff-cmd name infile1 infile1-sha1 infile1-mode \
339 * infile2 infile2-sha1 infile2-mode [ rename-to ]
342 static void run_external_diff(const char *name,
344 struct diff_spec *one,
345 struct diff_spec *two,
348 struct diff_tempfile *temp = diff_temp;
351 static int atexit_asked = 0;
354 struct diff_spec *tmp_spec;
355 tmp_spec = one; one = two; two = tmp_spec;
358 tmp = name; name = other; other = tmp;
362 if (!matches_pathspec(name) && (!other || !matches_pathspec(other)))
366 prepare_temp_file(name, &temp[0], one);
367 prepare_temp_file(other ? : name, &temp[1], two);
368 if (! atexit_asked &&
369 (temp[0].name == temp[0].tmp_path ||
370 temp[1].name == temp[1].tmp_path)) {
372 atexit(remove_tempfile);
374 signal(SIGINT, remove_tempfile_on_signal);
380 die("unable to fork");
382 const char *pgm = external_diff();
385 const char *exec_arg[9];
386 const char **arg = &exec_arg[0];
389 *arg++ = temp[0].name;
390 *arg++ = temp[0].hex;
391 *arg++ = temp[0].mode;
392 *arg++ = temp[1].name;
393 *arg++ = temp[1].hex;
394 *arg++ = temp[1].mode;
398 execvp(pgm, (char *const*) exec_arg);
401 execlp(pgm, pgm, name, NULL);
404 * otherwise we use the built-in one.
407 builtin_diff(name, other ? : name, temp, rename_score);
409 printf("* Unmerged path %s\n", name);
412 if (waitpid(pid, &status, 0) < 0 ||
413 !WIFEXITED(status) || WEXITSTATUS(status)) {
414 /* Earlier we did not check the exit status because
415 * diff exits non-zero if files are different, and
416 * we are not interested in knowing that. It was a
417 * mistake which made it harder to quit a diff-*
418 * session that uses the git-apply-patch-script as
419 * the GIT_EXTERNAL_DIFF. A custom GIT_EXTERNAL_DIFF
420 * should also exit non-zero only when it wants to
421 * abort the entire diff-* session.
424 fprintf(stderr, "external diff died, stopping at %s.\n", name);
431 * We do not detect circular renames. Just hold created and deleted
432 * entries and later attempt to match them up. If they do not match,
433 * then spit them out as deletes or creates as original.
436 static struct diff_spec_hold {
437 struct diff_spec_hold *next;
442 #define SHOULD_FREE 2
443 #define SHOULD_MUNMAP 4
446 } *createdfile, *deletedfile;
448 static void hold_diff(const char *name,
449 struct diff_spec *one,
450 struct diff_spec *two)
452 struct diff_spec_hold **list, *elem;
454 if (one->file_valid && two->file_valid)
455 die("internal error");
457 if (!detect_rename) {
458 run_external_diff(name, NULL, one, two, -1);
461 elem = xmalloc(sizeof(*elem) + strlen(name));
462 strcpy(elem->path, name);
466 if (one->file_valid) {
478 static int populate_data(struct diff_spec_hold *s)
484 if (s->it.sha1_valid) {
485 s->data = read_sha1_file(s->it.blob_sha1, type, &s->size);
486 s->flags |= SHOULD_FREE;
491 fd = open(s->path, O_RDONLY);
494 if (fstat(fd, &st)) {
498 s->size = st.st_size;
499 s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
504 s->flags |= SHOULD_MUNMAP;
509 static void free_data(struct diff_spec_hold *s)
511 if (s->flags & SHOULD_FREE)
513 else if (s->flags & SHOULD_MUNMAP)
514 munmap(s->data, s->size);
515 s->flags &= ~(SHOULD_FREE|SHOULD_MUNMAP);
519 static void flush_remaining_diff(struct diff_spec_hold *elem,
522 static struct diff_spec null_file_spec;
524 null_file_spec.file_valid = 0;
525 for ( ; elem ; elem = elem->next) {
527 if (elem->flags & MATCHED)
530 run_external_diff(elem->path, NULL,
531 &null_file_spec, &elem->it, -1);
533 run_external_diff(elem->path, NULL,
534 &elem->it, &null_file_spec, -1);
538 static int is_exact_match(struct diff_spec_hold *src,
539 struct diff_spec_hold *dst)
541 if (src->it.sha1_valid && dst->it.sha1_valid &&
542 !memcmp(src->it.blob_sha1, dst->it.blob_sha1, 20))
544 if (populate_data(src) || populate_data(dst))
545 /* this is an error but will be caught downstream */
547 if (src->size == dst->size &&
548 !memcmp(src->data, dst->data, src->size))
553 int estimate_similarity(struct diff_spec_hold *src, struct diff_spec_hold *dst)
555 /* src points at a deleted file and dst points at a created
556 * file. They may be quite similar, in which case we want to
557 * say src is renamed to dst.
559 * Compare them and return how similar they are, representing
560 * the score as an integer between 0 and 10000, except
561 * where they match exactly it is considered better than anything
565 unsigned long delta_size;
568 delta_size = ((src->size < dst->size) ?
569 (dst->size - src->size) : (src->size - dst->size));
571 /* We would not consider rename followed by more than
572 * minimum_score/MAX_SCORE edits; that is, delta_size must be smaller
573 * than (src->size + dst->size)/2 * minimum_score/MAX_SCORE,
577 if ((src->size+dst->size)*minimum_score < delta_size*MAX_SCORE*2)
580 delta = diff_delta(src->data, src->size,
581 dst->data, dst->size,
585 /* This "delta" is really xdiff with adler32 and all the
586 * overheads but it is a quick and dirty approximation.
588 * Now we will give some score to it. 100% edit gets
589 * 0 points and 0% edit gets MAX_SCORE points. That is, every
590 * 1/MAX_SCORE edit gets 1 point penalty. The amount of penalty is:
592 * (delta_size * 2 / (src->size + dst->size)) * MAX_SCORE
595 score = MAX_SCORE-(MAX_SCORE*2*delta_size/(src->size+dst->size));
596 if (score < 0) return 0;
597 if (MAX_SCORE < score) return MAX_SCORE;
602 struct diff_spec_hold *src;
603 struct diff_spec_hold *dst;
607 static int score_compare(const void *a_, const void *b_)
609 const struct diff_score *a = a_, *b = b_;
610 return b->score - a->score;
613 static void flush_rename_pair(struct diff_spec_hold *src,
614 struct diff_spec_hold *dst,
617 src->flags |= MATCHED;
618 dst->flags |= MATCHED;
621 run_external_diff(src->path, dst->path,
622 &src->it, &dst->it, rename_score);
625 static void free_held_diff(struct diff_spec_hold *list)
627 struct diff_spec_hold *h;
628 for (h = list; list; list = h) {
635 void diff_flush(void)
637 int num_create, num_delete, c, d;
638 struct diff_spec_hold *elem, *src, *dst;
639 struct diff_score *mx;
641 /* We really want to cull the candidates list early
642 * with cheap tests in order to avoid doing deltas.
644 * With the current callers, we should not have already
645 * matched entries at this point, but it is nonetheless
646 * checked for sanity.
648 for (dst = createdfile; dst; dst = dst->next) {
649 if (dst->flags & MATCHED)
651 for (src = deletedfile; src; src = src->next) {
652 if (src->flags & MATCHED)
654 if (! is_exact_match(src, dst))
656 flush_rename_pair(src, dst, MAX_SCORE);
661 /* Count surviving candidates */
662 for (num_create = 0, elem = createdfile; elem; elem = elem->next)
663 if (!(elem->flags & MATCHED))
666 for (num_delete = 0, elem = deletedfile; elem; elem = elem->next)
667 if (!(elem->flags & MATCHED))
670 if (num_create == 0 || num_delete == 0)
673 mx = xmalloc(sizeof(*mx) * num_create * num_delete);
674 for (c = 0, dst = createdfile; dst; dst = dst->next) {
675 int base = c * num_delete;
676 if (dst->flags & MATCHED)
678 for (d = 0, src = deletedfile; src; src = src->next) {
679 struct diff_score *m = &mx[base+d];
680 if (src->flags & MATCHED)
684 m->score = estimate_similarity(src, dst);
689 qsort(mx, num_create*num_delete, sizeof(*mx), score_compare);
692 for (c = 0; c < num_create * num_delete; c++) {
695 if ((src->flags & MATCHED) || (dst->flags & MATCHED))
698 "**score ** %d %s %s\n",
699 mx[c].score, src->path, dst->path);
703 for (c = 0; c < num_create * num_delete; c++) {
706 if ((src->flags & MATCHED) || (dst->flags & MATCHED))
708 if (mx[c].score < minimum_score)
710 flush_rename_pair(src, dst, mx[c].score);
715 flush_remaining_diff(createdfile, 1);
716 flush_remaining_diff(deletedfile, 0);
717 free_held_diff(createdfile);
718 free_held_diff(deletedfile);
719 createdfile = deletedfile = NULL;
722 int diff_scoreopt_parse(const char *opt)
724 int diglen, num, scale, i;
725 if (opt[0] != '-' || opt[1] != 'M')
726 return -1; /* that is not -M option */
727 diglen = strspn(opt+2, "0123456789");
728 if (diglen == 0 || strlen(opt+2) != diglen)
729 return 0; /* use default */
730 sscanf(opt+2, "%d", &num);
731 for (i = 0, scale = 1; i < diglen; i++)
734 /* user says num divided by scale and we say internally that
735 * is MAX_SCORE * num / scale.
737 return MAX_SCORE * num / scale;
740 void diff_setup(int detect_rename_, int minimum_score_, int reverse_diff_,
741 int diff_raw_output_,
742 const char **pathspec_, int speccnt_)
744 free_held_diff(createdfile);
745 free_held_diff(deletedfile);
746 createdfile = deletedfile = NULL;
748 detect_rename = detect_rename_;
749 reverse_diff = reverse_diff_;
750 pathspec = pathspec_;
751 diff_raw_output = diff_raw_output_;
753 minimum_score = minimum_score_ ? : DEFAULT_MINIMUM_SCORE;
756 static const char *git_object_type(unsigned mode)
758 return S_ISDIR(mode) ? "tree" : "blob";
761 void diff_addremove(int addremove, unsigned mode,
762 const unsigned char *sha1,
763 const char *base, const char *path)
765 char concatpath[PATH_MAX];
766 struct diff_spec spec[2], *one, *two;
768 if (0 <= diff_raw_output) {
772 addremove = (addremove == '+' ? '-' : '+');
773 printf("%c%06o %s %s %s%s%c",
776 git_object_type(mode), sha1_to_hex(sha1),
777 base, path, diff_raw_output);
783 memcpy(spec[0].blob_sha1, sha1, 20);
785 spec[0].sha1_valid = !!memcmp(sha1, null_sha1, 20);
786 spec[0].file_valid = 1;
787 spec[1].file_valid = 0;
789 if (addremove == '+') {
790 one = spec + 1; two = spec;
792 one = spec; two = one + 1;
796 strcpy(concatpath, base);
797 strcat(concatpath, path);
799 hold_diff(path ? concatpath : base, one, two);
802 void diff_change(unsigned old_mode, unsigned new_mode,
803 const unsigned char *old_sha1,
804 const unsigned char *new_sha1,
805 const char *base, const char *path) {
806 char concatpath[PATH_MAX];
807 struct diff_spec spec[2];
809 if (0 <= diff_raw_output) {
811 strcpy(old_hex, sha1_to_hex(old_sha1));
816 printf("*%06o->%06o %s %s->%s %s%s%c",
818 git_object_type(new_mode),
819 sha1_to_hex(new_sha1), old_hex,
820 base, path, diff_raw_output);
822 printf("*%06o->%06o %s %s->%s %s%s%c",
824 git_object_type(new_mode),
825 old_hex, sha1_to_hex(new_sha1),
826 base, path, diff_raw_output);
829 if (S_ISDIR(new_mode))
833 strcpy(concatpath, base);
834 strcat(concatpath, path);
837 memcpy(spec[0].blob_sha1, old_sha1, 20);
838 spec[0].mode = old_mode;
839 memcpy(spec[1].blob_sha1, new_sha1, 20);
840 spec[1].mode = new_mode;
841 spec[0].sha1_valid = !!memcmp(old_sha1, null_sha1, 20);
842 spec[1].sha1_valid = !!memcmp(new_sha1, null_sha1, 20);
843 spec[1].file_valid = spec[0].file_valid = 1;
845 /* We do not look at changed files as candidate for
846 * rename detection ever.
848 run_external_diff(path ? concatpath : base, NULL,
849 &spec[0], &spec[1], -1);
852 void diff_unmerge(const char *path)
854 if (0 <= diff_raw_output) {
855 printf("U %s%c", path, diff_raw_output);
858 run_external_diff(path, NULL, NULL, NULL, -1);