]> asedeno.scripts.mit.edu Git - git.git/blob - builtin-blame.c
Merge branch 'master' into ph/strbuf
[git.git] / builtin-blame.c
1 /*
2  * Pickaxe
3  *
4  * Copyright (c) 2006, Junio C Hamano
5  */
6
7 #include "cache.h"
8 #include "builtin.h"
9 #include "blob.h"
10 #include "commit.h"
11 #include "tag.h"
12 #include "tree-walk.h"
13 #include "diff.h"
14 #include "diffcore.h"
15 #include "revision.h"
16 #include "quote.h"
17 #include "xdiff-interface.h"
18 #include "cache-tree.h"
19 #include "path-list.h"
20 #include "mailmap.h"
21 #include "strbuf.h"
22
23 static char blame_usage[] =
24 "git-blame [-c] [-b] [-l] [--root] [-t] [-f] [-n] [-s] [-p] [-w] [-L n,m] [-S <revs-file>] [-M] [-C] [-C] [--contents <filename>] [--incremental] [commit] [--] file\n"
25 "  -c                  Use the same output mode as git-annotate (Default: off)\n"
26 "  -b                  Show blank SHA-1 for boundary commits (Default: off)\n"
27 "  -l                  Show long commit SHA1 (Default: off)\n"
28 "  --root              Do not treat root commits as boundaries (Default: off)\n"
29 "  -t                  Show raw timestamp (Default: off)\n"
30 "  -f, --show-name     Show original filename (Default: auto)\n"
31 "  -n, --show-number   Show original linenumber (Default: off)\n"
32 "  -s                  Suppress author name and timestamp (Default: off)\n"
33 "  -p, --porcelain     Show in a format designed for machine consumption\n"
34 "  -w                  Ignore whitespace differences\n"
35 "  -L n,m              Process only line range n,m, counting from 1\n"
36 "  -M, -C              Find line movements within and across files\n"
37 "  --incremental       Show blame entries as we find them, incrementally\n"
38 "  --contents file     Use <file>'s contents as the final image\n"
39 "  -S revs-file        Use revisions from revs-file instead of calling git-rev-list\n";
40
41 static int longest_file;
42 static int longest_author;
43 static int max_orig_digits;
44 static int max_digits;
45 static int max_score_digits;
46 static int show_root;
47 static int blank_boundary;
48 static int incremental;
49 static int cmd_is_annotate;
50 static int xdl_opts = XDF_NEED_MINIMAL;
51 static struct path_list mailmap;
52
53 #ifndef DEBUG
54 #define DEBUG 0
55 #endif
56
57 /* stats */
58 static int num_read_blob;
59 static int num_get_patch;
60 static int num_commits;
61
62 #define PICKAXE_BLAME_MOVE              01
63 #define PICKAXE_BLAME_COPY              02
64 #define PICKAXE_BLAME_COPY_HARDER       04
65 #define PICKAXE_BLAME_COPY_HARDEST      010
66
67 /*
68  * blame for a blame_entry with score lower than these thresholds
69  * is not passed to the parent using move/copy logic.
70  */
71 static unsigned blame_move_score;
72 static unsigned blame_copy_score;
73 #define BLAME_DEFAULT_MOVE_SCORE        20
74 #define BLAME_DEFAULT_COPY_SCORE        40
75
76 /* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
77 #define METAINFO_SHOWN          (1u<<12)
78 #define MORE_THAN_ONE_PATH      (1u<<13)
79
80 /*
81  * One blob in a commit that is being suspected
82  */
83 struct origin {
84         int refcnt;
85         struct commit *commit;
86         mmfile_t file;
87         unsigned char blob_sha1[20];
88         char path[FLEX_ARRAY];
89 };
90
91 /*
92  * Given an origin, prepare mmfile_t structure to be used by the
93  * diff machinery
94  */
95 static char *fill_origin_blob(struct origin *o, mmfile_t *file)
96 {
97         if (!o->file.ptr) {
98                 enum object_type type;
99                 num_read_blob++;
100                 file->ptr = read_sha1_file(o->blob_sha1, &type,
101                                            (unsigned long *)(&(file->size)));
102                 if (!file->ptr)
103                         die("Cannot read blob %s for path %s",
104                             sha1_to_hex(o->blob_sha1),
105                             o->path);
106                 o->file = *file;
107         }
108         else
109                 *file = o->file;
110         return file->ptr;
111 }
112
113 /*
114  * Origin is refcounted and usually we keep the blob contents to be
115  * reused.
116  */
117 static inline struct origin *origin_incref(struct origin *o)
118 {
119         if (o)
120                 o->refcnt++;
121         return o;
122 }
123
124 static void origin_decref(struct origin *o)
125 {
126         if (o && --o->refcnt <= 0) {
127                 if (o->file.ptr)
128                         free(o->file.ptr);
129                 memset(o, 0, sizeof(*o));
130                 free(o);
131         }
132 }
133
134 /*
135  * Each group of lines is described by a blame_entry; it can be split
136  * as we pass blame to the parents.  They form a linked list in the
137  * scoreboard structure, sorted by the target line number.
138  */
139 struct blame_entry {
140         struct blame_entry *prev;
141         struct blame_entry *next;
142
143         /* the first line of this group in the final image;
144          * internally all line numbers are 0 based.
145          */
146         int lno;
147
148         /* how many lines this group has */
149         int num_lines;
150
151         /* the commit that introduced this group into the final image */
152         struct origin *suspect;
153
154         /* true if the suspect is truly guilty; false while we have not
155          * checked if the group came from one of its parents.
156          */
157         char guilty;
158
159         /* the line number of the first line of this group in the
160          * suspect's file; internally all line numbers are 0 based.
161          */
162         int s_lno;
163
164         /* how significant this entry is -- cached to avoid
165          * scanning the lines over and over.
166          */
167         unsigned score;
168 };
169
170 /*
171  * The current state of the blame assignment.
172  */
173 struct scoreboard {
174         /* the final commit (i.e. where we started digging from) */
175         struct commit *final;
176
177         const char *path;
178
179         /*
180          * The contents in the final image.
181          * Used by many functions to obtain contents of the nth line,
182          * indexed with scoreboard.lineno[blame_entry.lno].
183          */
184         const char *final_buf;
185         unsigned long final_buf_size;
186
187         /* linked list of blames */
188         struct blame_entry *ent;
189
190         /* look-up a line in the final buffer */
191         int num_lines;
192         int *lineno;
193 };
194
195 static inline int same_suspect(struct origin *a, struct origin *b)
196 {
197         if (a == b)
198                 return 1;
199         if (a->commit != b->commit)
200                 return 0;
201         return !strcmp(a->path, b->path);
202 }
203
204 static void sanity_check_refcnt(struct scoreboard *);
205
206 /*
207  * If two blame entries that are next to each other came from
208  * contiguous lines in the same origin (i.e. <commit, path> pair),
209  * merge them together.
210  */
211 static void coalesce(struct scoreboard *sb)
212 {
213         struct blame_entry *ent, *next;
214
215         for (ent = sb->ent; ent && (next = ent->next); ent = next) {
216                 if (same_suspect(ent->suspect, next->suspect) &&
217                     ent->guilty == next->guilty &&
218                     ent->s_lno + ent->num_lines == next->s_lno) {
219                         ent->num_lines += next->num_lines;
220                         ent->next = next->next;
221                         if (ent->next)
222                                 ent->next->prev = ent;
223                         origin_decref(next->suspect);
224                         free(next);
225                         ent->score = 0;
226                         next = ent; /* again */
227                 }
228         }
229
230         if (DEBUG) /* sanity */
231                 sanity_check_refcnt(sb);
232 }
233
234 /*
235  * Given a commit and a path in it, create a new origin structure.
236  * The callers that add blame to the scoreboard should use
237  * get_origin() to obtain shared, refcounted copy instead of calling
238  * this function directly.
239  */
240 static struct origin *make_origin(struct commit *commit, const char *path)
241 {
242         struct origin *o;
243         o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
244         o->commit = commit;
245         o->refcnt = 1;
246         strcpy(o->path, path);
247         return o;
248 }
249
250 /*
251  * Locate an existing origin or create a new one.
252  */
253 static struct origin *get_origin(struct scoreboard *sb,
254                                  struct commit *commit,
255                                  const char *path)
256 {
257         struct blame_entry *e;
258
259         for (e = sb->ent; e; e = e->next) {
260                 if (e->suspect->commit == commit &&
261                     !strcmp(e->suspect->path, path))
262                         return origin_incref(e->suspect);
263         }
264         return make_origin(commit, path);
265 }
266
267 /*
268  * Fill the blob_sha1 field of an origin if it hasn't, so that later
269  * call to fill_origin_blob() can use it to locate the data.  blob_sha1
270  * for an origin is also used to pass the blame for the entire file to
271  * the parent to detect the case where a child's blob is identical to
272  * that of its parent's.
273  */
274 static int fill_blob_sha1(struct origin *origin)
275 {
276         unsigned mode;
277
278         if (!is_null_sha1(origin->blob_sha1))
279                 return 0;
280         if (get_tree_entry(origin->commit->object.sha1,
281                            origin->path,
282                            origin->blob_sha1, &mode))
283                 goto error_out;
284         if (sha1_object_info(origin->blob_sha1, NULL) != OBJ_BLOB)
285                 goto error_out;
286         return 0;
287  error_out:
288         hashclr(origin->blob_sha1);
289         return -1;
290 }
291
292 /*
293  * We have an origin -- check if the same path exists in the
294  * parent and return an origin structure to represent it.
295  */
296 static struct origin *find_origin(struct scoreboard *sb,
297                                   struct commit *parent,
298                                   struct origin *origin)
299 {
300         struct origin *porigin = NULL;
301         struct diff_options diff_opts;
302         const char *paths[2];
303
304         if (parent->util) {
305                 /*
306                  * Each commit object can cache one origin in that
307                  * commit.  This is a freestanding copy of origin and
308                  * not refcounted.
309                  */
310                 struct origin *cached = parent->util;
311                 if (!strcmp(cached->path, origin->path)) {
312                         /*
313                          * The same path between origin and its parent
314                          * without renaming -- the most common case.
315                          */
316                         porigin = get_origin(sb, parent, cached->path);
317
318                         /*
319                          * If the origin was newly created (i.e. get_origin
320                          * would call make_origin if none is found in the
321                          * scoreboard), it does not know the blob_sha1,
322                          * so copy it.  Otherwise porigin was in the
323                          * scoreboard and already knows blob_sha1.
324                          */
325                         if (porigin->refcnt == 1)
326                                 hashcpy(porigin->blob_sha1, cached->blob_sha1);
327                         return porigin;
328                 }
329                 /* otherwise it was not very useful; free it */
330                 free(parent->util);
331                 parent->util = NULL;
332         }
333
334         /* See if the origin->path is different between parent
335          * and origin first.  Most of the time they are the
336          * same and diff-tree is fairly efficient about this.
337          */
338         diff_setup(&diff_opts);
339         diff_opts.recursive = 1;
340         diff_opts.detect_rename = 0;
341         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
342         paths[0] = origin->path;
343         paths[1] = NULL;
344
345         diff_tree_setup_paths(paths, &diff_opts);
346         if (diff_setup_done(&diff_opts) < 0)
347                 die("diff-setup");
348
349         if (is_null_sha1(origin->commit->object.sha1))
350                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
351         else
352                 diff_tree_sha1(parent->tree->object.sha1,
353                                origin->commit->tree->object.sha1,
354                                "", &diff_opts);
355         diffcore_std(&diff_opts);
356
357         /* It is either one entry that says "modified", or "created",
358          * or nothing.
359          */
360         if (!diff_queued_diff.nr) {
361                 /* The path is the same as parent */
362                 porigin = get_origin(sb, parent, origin->path);
363                 hashcpy(porigin->blob_sha1, origin->blob_sha1);
364         }
365         else if (diff_queued_diff.nr != 1)
366                 die("internal error in blame::find_origin");
367         else {
368                 struct diff_filepair *p = diff_queued_diff.queue[0];
369                 switch (p->status) {
370                 default:
371                         die("internal error in blame::find_origin (%c)",
372                             p->status);
373                 case 'M':
374                         porigin = get_origin(sb, parent, origin->path);
375                         hashcpy(porigin->blob_sha1, p->one->sha1);
376                         break;
377                 case 'A':
378                 case 'T':
379                         /* Did not exist in parent, or type changed */
380                         break;
381                 }
382         }
383         diff_flush(&diff_opts);
384         if (porigin) {
385                 /*
386                  * Create a freestanding copy that is not part of
387                  * the refcounted origin found in the scoreboard, and
388                  * cache it in the commit.
389                  */
390                 struct origin *cached;
391
392                 cached = make_origin(porigin->commit, porigin->path);
393                 hashcpy(cached->blob_sha1, porigin->blob_sha1);
394                 parent->util = cached;
395         }
396         return porigin;
397 }
398
399 /*
400  * We have an origin -- find the path that corresponds to it in its
401  * parent and return an origin structure to represent it.
402  */
403 static struct origin *find_rename(struct scoreboard *sb,
404                                   struct commit *parent,
405                                   struct origin *origin)
406 {
407         struct origin *porigin = NULL;
408         struct diff_options diff_opts;
409         int i;
410         const char *paths[2];
411
412         diff_setup(&diff_opts);
413         diff_opts.recursive = 1;
414         diff_opts.detect_rename = DIFF_DETECT_RENAME;
415         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
416         diff_opts.single_follow = origin->path;
417         paths[0] = NULL;
418         diff_tree_setup_paths(paths, &diff_opts);
419         if (diff_setup_done(&diff_opts) < 0)
420                 die("diff-setup");
421
422         if (is_null_sha1(origin->commit->object.sha1))
423                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
424         else
425                 diff_tree_sha1(parent->tree->object.sha1,
426                                origin->commit->tree->object.sha1,
427                                "", &diff_opts);
428         diffcore_std(&diff_opts);
429
430         for (i = 0; i < diff_queued_diff.nr; i++) {
431                 struct diff_filepair *p = diff_queued_diff.queue[i];
432                 if ((p->status == 'R' || p->status == 'C') &&
433                     !strcmp(p->two->path, origin->path)) {
434                         porigin = get_origin(sb, parent, p->one->path);
435                         hashcpy(porigin->blob_sha1, p->one->sha1);
436                         break;
437                 }
438         }
439         diff_flush(&diff_opts);
440         return porigin;
441 }
442
443 /*
444  * Parsing of patch chunks...
445  */
446 struct chunk {
447         /* line number in postimage; up to but not including this
448          * line is the same as preimage
449          */
450         int same;
451
452         /* preimage line number after this chunk */
453         int p_next;
454
455         /* postimage line number after this chunk */
456         int t_next;
457 };
458
459 struct patch {
460         struct chunk *chunks;
461         int num;
462 };
463
464 struct blame_diff_state {
465         struct xdiff_emit_state xm;
466         struct patch *ret;
467         unsigned hunk_post_context;
468         unsigned hunk_in_pre_context : 1;
469 };
470
471 static void process_u_diff(void *state_, char *line, unsigned long len)
472 {
473         struct blame_diff_state *state = state_;
474         struct chunk *chunk;
475         int off1, off2, len1, len2, num;
476
477         num = state->ret->num;
478         if (len < 4 || line[0] != '@' || line[1] != '@') {
479                 if (state->hunk_in_pre_context && line[0] == ' ')
480                         state->ret->chunks[num - 1].same++;
481                 else {
482                         state->hunk_in_pre_context = 0;
483                         if (line[0] == ' ')
484                                 state->hunk_post_context++;
485                         else
486                                 state->hunk_post_context = 0;
487                 }
488                 return;
489         }
490
491         if (num && state->hunk_post_context) {
492                 chunk = &state->ret->chunks[num - 1];
493                 chunk->p_next -= state->hunk_post_context;
494                 chunk->t_next -= state->hunk_post_context;
495         }
496         state->ret->num = ++num;
497         state->ret->chunks = xrealloc(state->ret->chunks,
498                                       sizeof(struct chunk) * num);
499         chunk = &state->ret->chunks[num - 1];
500         if (parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) {
501                 state->ret->num--;
502                 return;
503         }
504
505         /* Line numbers in patch output are one based. */
506         off1--;
507         off2--;
508
509         chunk->same = len2 ? off2 : (off2 + 1);
510
511         chunk->p_next = off1 + (len1 ? len1 : 1);
512         chunk->t_next = chunk->same + len2;
513         state->hunk_in_pre_context = 1;
514         state->hunk_post_context = 0;
515 }
516
517 static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o,
518                                     int context)
519 {
520         struct blame_diff_state state;
521         xpparam_t xpp;
522         xdemitconf_t xecfg;
523         xdemitcb_t ecb;
524
525         xpp.flags = xdl_opts;
526         memset(&xecfg, 0, sizeof(xecfg));
527         xecfg.ctxlen = context;
528         ecb.outf = xdiff_outf;
529         ecb.priv = &state;
530         memset(&state, 0, sizeof(state));
531         state.xm.consume = process_u_diff;
532         state.ret = xmalloc(sizeof(struct patch));
533         state.ret->chunks = NULL;
534         state.ret->num = 0;
535
536         xdl_diff(file_p, file_o, &xpp, &xecfg, &ecb);
537
538         if (state.ret->num) {
539                 struct chunk *chunk;
540                 chunk = &state.ret->chunks[state.ret->num - 1];
541                 chunk->p_next -= state.hunk_post_context;
542                 chunk->t_next -= state.hunk_post_context;
543         }
544         return state.ret;
545 }
546
547 /*
548  * Run diff between two origins and grab the patch output, so that
549  * we can pass blame for lines origin is currently suspected for
550  * to its parent.
551  */
552 static struct patch *get_patch(struct origin *parent, struct origin *origin)
553 {
554         mmfile_t file_p, file_o;
555         struct patch *patch;
556
557         fill_origin_blob(parent, &file_p);
558         fill_origin_blob(origin, &file_o);
559         if (!file_p.ptr || !file_o.ptr)
560                 return NULL;
561         patch = compare_buffer(&file_p, &file_o, 0);
562         num_get_patch++;
563         return patch;
564 }
565
566 static void free_patch(struct patch *p)
567 {
568         free(p->chunks);
569         free(p);
570 }
571
572 /*
573  * Link in a new blame entry to the scoreboard.  Entries that cover the
574  * same line range have been removed from the scoreboard previously.
575  */
576 static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e)
577 {
578         struct blame_entry *ent, *prev = NULL;
579
580         origin_incref(e->suspect);
581
582         for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
583                 prev = ent;
584
585         /* prev, if not NULL, is the last one that is below e */
586         e->prev = prev;
587         if (prev) {
588                 e->next = prev->next;
589                 prev->next = e;
590         }
591         else {
592                 e->next = sb->ent;
593                 sb->ent = e;
594         }
595         if (e->next)
596                 e->next->prev = e;
597 }
598
599 /*
600  * src typically is on-stack; we want to copy the information in it to
601  * an malloced blame_entry that is already on the linked list of the
602  * scoreboard.  The origin of dst loses a refcnt while the origin of src
603  * gains one.
604  */
605 static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
606 {
607         struct blame_entry *p, *n;
608
609         p = dst->prev;
610         n = dst->next;
611         origin_incref(src->suspect);
612         origin_decref(dst->suspect);
613         memcpy(dst, src, sizeof(*src));
614         dst->prev = p;
615         dst->next = n;
616         dst->score = 0;
617 }
618
619 static const char *nth_line(struct scoreboard *sb, int lno)
620 {
621         return sb->final_buf + sb->lineno[lno];
622 }
623
624 /*
625  * It is known that lines between tlno to same came from parent, and e
626  * has an overlap with that range.  it also is known that parent's
627  * line plno corresponds to e's line tlno.
628  *
629  *                <---- e ----->
630  *                   <------>
631  *                   <------------>
632  *             <------------>
633  *             <------------------>
634  *
635  * Split e into potentially three parts; before this chunk, the chunk
636  * to be blamed for the parent, and after that portion.
637  */
638 static void split_overlap(struct blame_entry *split,
639                           struct blame_entry *e,
640                           int tlno, int plno, int same,
641                           struct origin *parent)
642 {
643         int chunk_end_lno;
644         memset(split, 0, sizeof(struct blame_entry [3]));
645
646         if (e->s_lno < tlno) {
647                 /* there is a pre-chunk part not blamed on parent */
648                 split[0].suspect = origin_incref(e->suspect);
649                 split[0].lno = e->lno;
650                 split[0].s_lno = e->s_lno;
651                 split[0].num_lines = tlno - e->s_lno;
652                 split[1].lno = e->lno + tlno - e->s_lno;
653                 split[1].s_lno = plno;
654         }
655         else {
656                 split[1].lno = e->lno;
657                 split[1].s_lno = plno + (e->s_lno - tlno);
658         }
659
660         if (same < e->s_lno + e->num_lines) {
661                 /* there is a post-chunk part not blamed on parent */
662                 split[2].suspect = origin_incref(e->suspect);
663                 split[2].lno = e->lno + (same - e->s_lno);
664                 split[2].s_lno = e->s_lno + (same - e->s_lno);
665                 split[2].num_lines = e->s_lno + e->num_lines - same;
666                 chunk_end_lno = split[2].lno;
667         }
668         else
669                 chunk_end_lno = e->lno + e->num_lines;
670         split[1].num_lines = chunk_end_lno - split[1].lno;
671
672         /*
673          * if it turns out there is nothing to blame the parent for,
674          * forget about the splitting.  !split[1].suspect signals this.
675          */
676         if (split[1].num_lines < 1)
677                 return;
678         split[1].suspect = origin_incref(parent);
679 }
680
681 /*
682  * split_overlap() divided an existing blame e into up to three parts
683  * in split.  Adjust the linked list of blames in the scoreboard to
684  * reflect the split.
685  */
686 static void split_blame(struct scoreboard *sb,
687                         struct blame_entry *split,
688                         struct blame_entry *e)
689 {
690         struct blame_entry *new_entry;
691
692         if (split[0].suspect && split[2].suspect) {
693                 /* The first part (reuse storage for the existing entry e) */
694                 dup_entry(e, &split[0]);
695
696                 /* The last part -- me */
697                 new_entry = xmalloc(sizeof(*new_entry));
698                 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
699                 add_blame_entry(sb, new_entry);
700
701                 /* ... and the middle part -- parent */
702                 new_entry = xmalloc(sizeof(*new_entry));
703                 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
704                 add_blame_entry(sb, new_entry);
705         }
706         else if (!split[0].suspect && !split[2].suspect)
707                 /*
708                  * The parent covers the entire area; reuse storage for
709                  * e and replace it with the parent.
710                  */
711                 dup_entry(e, &split[1]);
712         else if (split[0].suspect) {
713                 /* me and then parent */
714                 dup_entry(e, &split[0]);
715
716                 new_entry = xmalloc(sizeof(*new_entry));
717                 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
718                 add_blame_entry(sb, new_entry);
719         }
720         else {
721                 /* parent and then me */
722                 dup_entry(e, &split[1]);
723
724                 new_entry = xmalloc(sizeof(*new_entry));
725                 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
726                 add_blame_entry(sb, new_entry);
727         }
728
729         if (DEBUG) { /* sanity */
730                 struct blame_entry *ent;
731                 int lno = sb->ent->lno, corrupt = 0;
732
733                 for (ent = sb->ent; ent; ent = ent->next) {
734                         if (lno != ent->lno)
735                                 corrupt = 1;
736                         if (ent->s_lno < 0)
737                                 corrupt = 1;
738                         lno += ent->num_lines;
739                 }
740                 if (corrupt) {
741                         lno = sb->ent->lno;
742                         for (ent = sb->ent; ent; ent = ent->next) {
743                                 printf("L %8d l %8d n %8d\n",
744                                        lno, ent->lno, ent->num_lines);
745                                 lno = ent->lno + ent->num_lines;
746                         }
747                         die("oops");
748                 }
749         }
750 }
751
752 /*
753  * After splitting the blame, the origins used by the
754  * on-stack blame_entry should lose one refcnt each.
755  */
756 static void decref_split(struct blame_entry *split)
757 {
758         int i;
759
760         for (i = 0; i < 3; i++)
761                 origin_decref(split[i].suspect);
762 }
763
764 /*
765  * Helper for blame_chunk().  blame_entry e is known to overlap with
766  * the patch hunk; split it and pass blame to the parent.
767  */
768 static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
769                           int tlno, int plno, int same,
770                           struct origin *parent)
771 {
772         struct blame_entry split[3];
773
774         split_overlap(split, e, tlno, plno, same, parent);
775         if (split[1].suspect)
776                 split_blame(sb, split, e);
777         decref_split(split);
778 }
779
780 /*
781  * Find the line number of the last line the target is suspected for.
782  */
783 static int find_last_in_target(struct scoreboard *sb, struct origin *target)
784 {
785         struct blame_entry *e;
786         int last_in_target = -1;
787
788         for (e = sb->ent; e; e = e->next) {
789                 if (e->guilty || !same_suspect(e->suspect, target))
790                         continue;
791                 if (last_in_target < e->s_lno + e->num_lines)
792                         last_in_target = e->s_lno + e->num_lines;
793         }
794         return last_in_target;
795 }
796
797 /*
798  * Process one hunk from the patch between the current suspect for
799  * blame_entry e and its parent.  Find and split the overlap, and
800  * pass blame to the overlapping part to the parent.
801  */
802 static void blame_chunk(struct scoreboard *sb,
803                         int tlno, int plno, int same,
804                         struct origin *target, struct origin *parent)
805 {
806         struct blame_entry *e;
807
808         for (e = sb->ent; e; e = e->next) {
809                 if (e->guilty || !same_suspect(e->suspect, target))
810                         continue;
811                 if (same <= e->s_lno)
812                         continue;
813                 if (tlno < e->s_lno + e->num_lines)
814                         blame_overlap(sb, e, tlno, plno, same, parent);
815         }
816 }
817
818 /*
819  * We are looking at the origin 'target' and aiming to pass blame
820  * for the lines it is suspected to its parent.  Run diff to find
821  * which lines came from parent and pass blame for them.
822  */
823 static int pass_blame_to_parent(struct scoreboard *sb,
824                                 struct origin *target,
825                                 struct origin *parent)
826 {
827         int i, last_in_target, plno, tlno;
828         struct patch *patch;
829
830         last_in_target = find_last_in_target(sb, target);
831         if (last_in_target < 0)
832                 return 1; /* nothing remains for this target */
833
834         patch = get_patch(parent, target);
835         plno = tlno = 0;
836         for (i = 0; i < patch->num; i++) {
837                 struct chunk *chunk = &patch->chunks[i];
838
839                 blame_chunk(sb, tlno, plno, chunk->same, target, parent);
840                 plno = chunk->p_next;
841                 tlno = chunk->t_next;
842         }
843         /* The rest (i.e. anything after tlno) are the same as the parent */
844         blame_chunk(sb, tlno, plno, last_in_target, target, parent);
845
846         free_patch(patch);
847         return 0;
848 }
849
850 /*
851  * The lines in blame_entry after splitting blames many times can become
852  * very small and trivial, and at some point it becomes pointless to
853  * blame the parents.  E.g. "\t\t}\n\t}\n\n" appears everywhere in any
854  * ordinary C program, and it is not worth to say it was copied from
855  * totally unrelated file in the parent.
856  *
857  * Compute how trivial the lines in the blame_entry are.
858  */
859 static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
860 {
861         unsigned score;
862         const char *cp, *ep;
863
864         if (e->score)
865                 return e->score;
866
867         score = 1;
868         cp = nth_line(sb, e->lno);
869         ep = nth_line(sb, e->lno + e->num_lines);
870         while (cp < ep) {
871                 unsigned ch = *((unsigned char *)cp);
872                 if (isalnum(ch))
873                         score++;
874                 cp++;
875         }
876         e->score = score;
877         return score;
878 }
879
880 /*
881  * best_so_far[] and this[] are both a split of an existing blame_entry
882  * that passes blame to the parent.  Maintain best_so_far the best split
883  * so far, by comparing this and best_so_far and copying this into
884  * bst_so_far as needed.
885  */
886 static void copy_split_if_better(struct scoreboard *sb,
887                                  struct blame_entry *best_so_far,
888                                  struct blame_entry *this)
889 {
890         int i;
891
892         if (!this[1].suspect)
893                 return;
894         if (best_so_far[1].suspect) {
895                 if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
896                         return;
897         }
898
899         for (i = 0; i < 3; i++)
900                 origin_incref(this[i].suspect);
901         decref_split(best_so_far);
902         memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
903 }
904
905 /*
906  * We are looking at a part of the final image represented by
907  * ent (tlno and same are offset by ent->s_lno).
908  * tlno is where we are looking at in the final image.
909  * up to (but not including) same match preimage.
910  * plno is where we are looking at in the preimage.
911  *
912  * <-------------- final image ---------------------->
913  *       <------ent------>
914  *         ^tlno ^same
915  *    <---------preimage----->
916  *         ^plno
917  *
918  * All line numbers are 0-based.
919  */
920 static void handle_split(struct scoreboard *sb,
921                          struct blame_entry *ent,
922                          int tlno, int plno, int same,
923                          struct origin *parent,
924                          struct blame_entry *split)
925 {
926         if (ent->num_lines <= tlno)
927                 return;
928         if (tlno < same) {
929                 struct blame_entry this[3];
930                 tlno += ent->s_lno;
931                 same += ent->s_lno;
932                 split_overlap(this, ent, tlno, plno, same, parent);
933                 copy_split_if_better(sb, split, this);
934                 decref_split(this);
935         }
936 }
937
938 /*
939  * Find the lines from parent that are the same as ent so that
940  * we can pass blames to it.  file_p has the blob contents for
941  * the parent.
942  */
943 static void find_copy_in_blob(struct scoreboard *sb,
944                               struct blame_entry *ent,
945                               struct origin *parent,
946                               struct blame_entry *split,
947                               mmfile_t *file_p)
948 {
949         const char *cp;
950         int cnt;
951         mmfile_t file_o;
952         struct patch *patch;
953         int i, plno, tlno;
954
955         /*
956          * Prepare mmfile that contains only the lines in ent.
957          */
958         cp = nth_line(sb, ent->lno);
959         file_o.ptr = (char*) cp;
960         cnt = ent->num_lines;
961
962         while (cnt && cp < sb->final_buf + sb->final_buf_size) {
963                 if (*cp++ == '\n')
964                         cnt--;
965         }
966         file_o.size = cp - file_o.ptr;
967
968         patch = compare_buffer(file_p, &file_o, 1);
969
970         /*
971          * file_o is a part of final image we are annotating.
972          * file_p partially may match that image.
973          */
974         memset(split, 0, sizeof(struct blame_entry [3]));
975         plno = tlno = 0;
976         for (i = 0; i < patch->num; i++) {
977                 struct chunk *chunk = &patch->chunks[i];
978
979                 handle_split(sb, ent, tlno, plno, chunk->same, parent, split);
980                 plno = chunk->p_next;
981                 tlno = chunk->t_next;
982         }
983         /* remainder, if any, all match the preimage */
984         handle_split(sb, ent, tlno, plno, ent->num_lines, parent, split);
985         free_patch(patch);
986 }
987
988 /*
989  * See if lines currently target is suspected for can be attributed to
990  * parent.
991  */
992 static int find_move_in_parent(struct scoreboard *sb,
993                                struct origin *target,
994                                struct origin *parent)
995 {
996         int last_in_target, made_progress;
997         struct blame_entry *e, split[3];
998         mmfile_t file_p;
999
1000         last_in_target = find_last_in_target(sb, target);
1001         if (last_in_target < 0)
1002                 return 1; /* nothing remains for this target */
1003
1004         fill_origin_blob(parent, &file_p);
1005         if (!file_p.ptr)
1006                 return 0;
1007
1008         made_progress = 1;
1009         while (made_progress) {
1010                 made_progress = 0;
1011                 for (e = sb->ent; e; e = e->next) {
1012                         if (e->guilty || !same_suspect(e->suspect, target))
1013                                 continue;
1014                         find_copy_in_blob(sb, e, parent, split, &file_p);
1015                         if (split[1].suspect &&
1016                             blame_move_score < ent_score(sb, &split[1])) {
1017                                 split_blame(sb, split, e);
1018                                 made_progress = 1;
1019                         }
1020                         decref_split(split);
1021                 }
1022         }
1023         return 0;
1024 }
1025
1026 struct blame_list {
1027         struct blame_entry *ent;
1028         struct blame_entry split[3];
1029 };
1030
1031 /*
1032  * Count the number of entries the target is suspected for,
1033  * and prepare a list of entry and the best split.
1034  */
1035 static struct blame_list *setup_blame_list(struct scoreboard *sb,
1036                                            struct origin *target,
1037                                            int *num_ents_p)
1038 {
1039         struct blame_entry *e;
1040         int num_ents, i;
1041         struct blame_list *blame_list = NULL;
1042
1043         for (e = sb->ent, num_ents = 0; e; e = e->next)
1044                 if (!e->guilty && same_suspect(e->suspect, target))
1045                         num_ents++;
1046         if (num_ents) {
1047                 blame_list = xcalloc(num_ents, sizeof(struct blame_list));
1048                 for (e = sb->ent, i = 0; e; e = e->next)
1049                         if (!e->guilty && same_suspect(e->suspect, target))
1050                                 blame_list[i++].ent = e;
1051         }
1052         *num_ents_p = num_ents;
1053         return blame_list;
1054 }
1055
1056 /*
1057  * For lines target is suspected for, see if we can find code movement
1058  * across file boundary from the parent commit.  porigin is the path
1059  * in the parent we already tried.
1060  */
1061 static int find_copy_in_parent(struct scoreboard *sb,
1062                                struct origin *target,
1063                                struct commit *parent,
1064                                struct origin *porigin,
1065                                int opt)
1066 {
1067         struct diff_options diff_opts;
1068         const char *paths[1];
1069         int i, j;
1070         int retval;
1071         struct blame_list *blame_list;
1072         int num_ents;
1073
1074         blame_list = setup_blame_list(sb, target, &num_ents);
1075         if (!blame_list)
1076                 return 1; /* nothing remains for this target */
1077
1078         diff_setup(&diff_opts);
1079         diff_opts.recursive = 1;
1080         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
1081
1082         paths[0] = NULL;
1083         diff_tree_setup_paths(paths, &diff_opts);
1084         if (diff_setup_done(&diff_opts) < 0)
1085                 die("diff-setup");
1086
1087         /* Try "find copies harder" on new path if requested;
1088          * we do not want to use diffcore_rename() actually to
1089          * match things up; find_copies_harder is set only to
1090          * force diff_tree_sha1() to feed all filepairs to diff_queue,
1091          * and this code needs to be after diff_setup_done(), which
1092          * usually makes find-copies-harder imply copy detection.
1093          */
1094         if ((opt & PICKAXE_BLAME_COPY_HARDEST)
1095             || ((opt & PICKAXE_BLAME_COPY_HARDER)
1096                 && (!porigin || strcmp(target->path, porigin->path))))
1097                 diff_opts.find_copies_harder = 1;
1098
1099         if (is_null_sha1(target->commit->object.sha1))
1100                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
1101         else
1102                 diff_tree_sha1(parent->tree->object.sha1,
1103                                target->commit->tree->object.sha1,
1104                                "", &diff_opts);
1105
1106         if (!diff_opts.find_copies_harder)
1107                 diffcore_std(&diff_opts);
1108
1109         retval = 0;
1110         while (1) {
1111                 int made_progress = 0;
1112
1113                 for (i = 0; i < diff_queued_diff.nr; i++) {
1114                         struct diff_filepair *p = diff_queued_diff.queue[i];
1115                         struct origin *norigin;
1116                         mmfile_t file_p;
1117                         struct blame_entry this[3];
1118
1119                         if (!DIFF_FILE_VALID(p->one))
1120                                 continue; /* does not exist in parent */
1121                         if (porigin && !strcmp(p->one->path, porigin->path))
1122                                 /* find_move already dealt with this path */
1123                                 continue;
1124
1125                         norigin = get_origin(sb, parent, p->one->path);
1126                         hashcpy(norigin->blob_sha1, p->one->sha1);
1127                         fill_origin_blob(norigin, &file_p);
1128                         if (!file_p.ptr)
1129                                 continue;
1130
1131                         for (j = 0; j < num_ents; j++) {
1132                                 find_copy_in_blob(sb, blame_list[j].ent,
1133                                                   norigin, this, &file_p);
1134                                 copy_split_if_better(sb, blame_list[j].split,
1135                                                      this);
1136                                 decref_split(this);
1137                         }
1138                         origin_decref(norigin);
1139                 }
1140
1141                 for (j = 0; j < num_ents; j++) {
1142                         struct blame_entry *split = blame_list[j].split;
1143                         if (split[1].suspect &&
1144                             blame_copy_score < ent_score(sb, &split[1])) {
1145                                 split_blame(sb, split, blame_list[j].ent);
1146                                 made_progress = 1;
1147                         }
1148                         decref_split(split);
1149                 }
1150                 free(blame_list);
1151
1152                 if (!made_progress)
1153                         break;
1154                 blame_list = setup_blame_list(sb, target, &num_ents);
1155                 if (!blame_list) {
1156                         retval = 1;
1157                         break;
1158                 }
1159         }
1160         diff_flush(&diff_opts);
1161
1162         return retval;
1163 }
1164
1165 /*
1166  * The blobs of origin and porigin exactly match, so everything
1167  * origin is suspected for can be blamed on the parent.
1168  */
1169 static void pass_whole_blame(struct scoreboard *sb,
1170                              struct origin *origin, struct origin *porigin)
1171 {
1172         struct blame_entry *e;
1173
1174         if (!porigin->file.ptr && origin->file.ptr) {
1175                 /* Steal its file */
1176                 porigin->file = origin->file;
1177                 origin->file.ptr = NULL;
1178         }
1179         for (e = sb->ent; e; e = e->next) {
1180                 if (!same_suspect(e->suspect, origin))
1181                         continue;
1182                 origin_incref(porigin);
1183                 origin_decref(e->suspect);
1184                 e->suspect = porigin;
1185         }
1186 }
1187
1188 #define MAXPARENT 16
1189
1190 static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
1191 {
1192         int i, pass;
1193         struct commit *commit = origin->commit;
1194         struct commit_list *parent;
1195         struct origin *parent_origin[MAXPARENT], *porigin;
1196
1197         memset(parent_origin, 0, sizeof(parent_origin));
1198
1199         /* The first pass looks for unrenamed path to optimize for
1200          * common cases, then we look for renames in the second pass.
1201          */
1202         for (pass = 0; pass < 2; pass++) {
1203                 struct origin *(*find)(struct scoreboard *,
1204                                        struct commit *, struct origin *);
1205                 find = pass ? find_rename : find_origin;
1206
1207                 for (i = 0, parent = commit->parents;
1208                      i < MAXPARENT && parent;
1209                      parent = parent->next, i++) {
1210                         struct commit *p = parent->item;
1211                         int j, same;
1212
1213                         if (parent_origin[i])
1214                                 continue;
1215                         if (parse_commit(p))
1216                                 continue;
1217                         porigin = find(sb, p, origin);
1218                         if (!porigin)
1219                                 continue;
1220                         if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {
1221                                 pass_whole_blame(sb, origin, porigin);
1222                                 origin_decref(porigin);
1223                                 goto finish;
1224                         }
1225                         for (j = same = 0; j < i; j++)
1226                                 if (parent_origin[j] &&
1227                                     !hashcmp(parent_origin[j]->blob_sha1,
1228                                              porigin->blob_sha1)) {
1229                                         same = 1;
1230                                         break;
1231                                 }
1232                         if (!same)
1233                                 parent_origin[i] = porigin;
1234                         else
1235                                 origin_decref(porigin);
1236                 }
1237         }
1238
1239         num_commits++;
1240         for (i = 0, parent = commit->parents;
1241              i < MAXPARENT && parent;
1242              parent = parent->next, i++) {
1243                 struct origin *porigin = parent_origin[i];
1244                 if (!porigin)
1245                         continue;
1246                 if (pass_blame_to_parent(sb, origin, porigin))
1247                         goto finish;
1248         }
1249
1250         /*
1251          * Optionally find moves in parents' files.
1252          */
1253         if (opt & PICKAXE_BLAME_MOVE)
1254                 for (i = 0, parent = commit->parents;
1255                      i < MAXPARENT && parent;
1256                      parent = parent->next, i++) {
1257                         struct origin *porigin = parent_origin[i];
1258                         if (!porigin)
1259                                 continue;
1260                         if (find_move_in_parent(sb, origin, porigin))
1261                                 goto finish;
1262                 }
1263
1264         /*
1265          * Optionally find copies from parents' files.
1266          */
1267         if (opt & PICKAXE_BLAME_COPY)
1268                 for (i = 0, parent = commit->parents;
1269                      i < MAXPARENT && parent;
1270                      parent = parent->next, i++) {
1271                         struct origin *porigin = parent_origin[i];
1272                         if (find_copy_in_parent(sb, origin, parent->item,
1273                                                 porigin, opt))
1274                                 goto finish;
1275                 }
1276
1277  finish:
1278         for (i = 0; i < MAXPARENT; i++)
1279                 origin_decref(parent_origin[i]);
1280 }
1281
1282 /*
1283  * Information on commits, used for output.
1284  */
1285 struct commit_info
1286 {
1287         const char *author;
1288         const char *author_mail;
1289         unsigned long author_time;
1290         const char *author_tz;
1291
1292         /* filled only when asked for details */
1293         const char *committer;
1294         const char *committer_mail;
1295         unsigned long committer_time;
1296         const char *committer_tz;
1297
1298         const char *summary;
1299 };
1300
1301 /*
1302  * Parse author/committer line in the commit object buffer
1303  */
1304 static void get_ac_line(const char *inbuf, const char *what,
1305                         int bufsz, char *person, const char **mail,
1306                         unsigned long *time, const char **tz)
1307 {
1308         int len, tzlen, maillen;
1309         char *tmp, *endp, *timepos;
1310
1311         tmp = strstr(inbuf, what);
1312         if (!tmp)
1313                 goto error_out;
1314         tmp += strlen(what);
1315         endp = strchr(tmp, '\n');
1316         if (!endp)
1317                 len = strlen(tmp);
1318         else
1319                 len = endp - tmp;
1320         if (bufsz <= len) {
1321         error_out:
1322                 /* Ugh */
1323                 *mail = *tz = "(unknown)";
1324                 *time = 0;
1325                 return;
1326         }
1327         memcpy(person, tmp, len);
1328
1329         tmp = person;
1330         tmp += len;
1331         *tmp = 0;
1332         while (*tmp != ' ')
1333                 tmp--;
1334         *tz = tmp+1;
1335         tzlen = (person+len)-(tmp+1);
1336
1337         *tmp = 0;
1338         while (*tmp != ' ')
1339                 tmp--;
1340         *time = strtoul(tmp, NULL, 10);
1341         timepos = tmp;
1342
1343         *tmp = 0;
1344         while (*tmp != ' ')
1345                 tmp--;
1346         *mail = tmp + 1;
1347         *tmp = 0;
1348         maillen = timepos - tmp;
1349
1350         if (!mailmap.nr)
1351                 return;
1352
1353         /*
1354          * mailmap expansion may make the name longer.
1355          * make room by pushing stuff down.
1356          */
1357         tmp = person + bufsz - (tzlen + 1);
1358         memmove(tmp, *tz, tzlen);
1359         tmp[tzlen] = 0;
1360         *tz = tmp;
1361
1362         tmp = tmp - (maillen + 1);
1363         memmove(tmp, *mail, maillen);
1364         tmp[maillen] = 0;
1365         *mail = tmp;
1366
1367         /*
1368          * Now, convert e-mail using mailmap
1369          */
1370         map_email(&mailmap, tmp + 1, person, tmp-person-1);
1371 }
1372
1373 static void get_commit_info(struct commit *commit,
1374                             struct commit_info *ret,
1375                             int detailed)
1376 {
1377         int len;
1378         char *tmp, *endp;
1379         static char author_buf[1024];
1380         static char committer_buf[1024];
1381         static char summary_buf[1024];
1382
1383         /*
1384          * We've operated without save_commit_buffer, so
1385          * we now need to populate them for output.
1386          */
1387         if (!commit->buffer) {
1388                 enum object_type type;
1389                 unsigned long size;
1390                 commit->buffer =
1391                         read_sha1_file(commit->object.sha1, &type, &size);
1392                 if (!commit->buffer)
1393                         die("Cannot read commit %s",
1394                             sha1_to_hex(commit->object.sha1));
1395         }
1396         ret->author = author_buf;
1397         get_ac_line(commit->buffer, "\nauthor ",
1398                     sizeof(author_buf), author_buf, &ret->author_mail,
1399                     &ret->author_time, &ret->author_tz);
1400
1401         if (!detailed)
1402                 return;
1403
1404         ret->committer = committer_buf;
1405         get_ac_line(commit->buffer, "\ncommitter ",
1406                     sizeof(committer_buf), committer_buf, &ret->committer_mail,
1407                     &ret->committer_time, &ret->committer_tz);
1408
1409         ret->summary = summary_buf;
1410         tmp = strstr(commit->buffer, "\n\n");
1411         if (!tmp) {
1412         error_out:
1413                 sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
1414                 return;
1415         }
1416         tmp += 2;
1417         endp = strchr(tmp, '\n');
1418         if (!endp)
1419                 endp = tmp + strlen(tmp);
1420         len = endp - tmp;
1421         if (len >= sizeof(summary_buf) || len == 0)
1422                 goto error_out;
1423         memcpy(summary_buf, tmp, len);
1424         summary_buf[len] = 0;
1425 }
1426
1427 /*
1428  * To allow LF and other nonportable characters in pathnames,
1429  * they are c-style quoted as needed.
1430  */
1431 static void write_filename_info(const char *path)
1432 {
1433         printf("filename ");
1434         write_name_quoted(NULL, 0, path, 1, stdout);
1435         putchar('\n');
1436 }
1437
1438 /*
1439  * The blame_entry is found to be guilty for the range.  Mark it
1440  * as such, and show it in incremental output.
1441  */
1442 static void found_guilty_entry(struct blame_entry *ent)
1443 {
1444         if (ent->guilty)
1445                 return;
1446         ent->guilty = 1;
1447         if (incremental) {
1448                 struct origin *suspect = ent->suspect;
1449
1450                 printf("%s %d %d %d\n",
1451                        sha1_to_hex(suspect->commit->object.sha1),
1452                        ent->s_lno + 1, ent->lno + 1, ent->num_lines);
1453                 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1454                         struct commit_info ci;
1455                         suspect->commit->object.flags |= METAINFO_SHOWN;
1456                         get_commit_info(suspect->commit, &ci, 1);
1457                         printf("author %s\n", ci.author);
1458                         printf("author-mail %s\n", ci.author_mail);
1459                         printf("author-time %lu\n", ci.author_time);
1460                         printf("author-tz %s\n", ci.author_tz);
1461                         printf("committer %s\n", ci.committer);
1462                         printf("committer-mail %s\n", ci.committer_mail);
1463                         printf("committer-time %lu\n", ci.committer_time);
1464                         printf("committer-tz %s\n", ci.committer_tz);
1465                         printf("summary %s\n", ci.summary);
1466                         if (suspect->commit->object.flags & UNINTERESTING)
1467                                 printf("boundary\n");
1468                 }
1469                 write_filename_info(suspect->path);
1470                 maybe_flush_or_die(stdout, "stdout");
1471         }
1472 }
1473
1474 /*
1475  * The main loop -- while the scoreboard has lines whose true origin
1476  * is still unknown, pick one blame_entry, and allow its current
1477  * suspect to pass blames to its parents.
1478  */
1479 static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt)
1480 {
1481         while (1) {
1482                 struct blame_entry *ent;
1483                 struct commit *commit;
1484                 struct origin *suspect = NULL;
1485
1486                 /* find one suspect to break down */
1487                 for (ent = sb->ent; !suspect && ent; ent = ent->next)
1488                         if (!ent->guilty)
1489                                 suspect = ent->suspect;
1490                 if (!suspect)
1491                         return; /* all done */
1492
1493                 /*
1494                  * We will use this suspect later in the loop,
1495                  * so hold onto it in the meantime.
1496                  */
1497                 origin_incref(suspect);
1498                 commit = suspect->commit;
1499                 if (!commit->object.parsed)
1500                         parse_commit(commit);
1501                 if (!(commit->object.flags & UNINTERESTING) &&
1502                     !(revs->max_age != -1 && commit->date < revs->max_age))
1503                         pass_blame(sb, suspect, opt);
1504                 else {
1505                         commit->object.flags |= UNINTERESTING;
1506                         if (commit->object.parsed)
1507                                 mark_parents_uninteresting(commit);
1508                 }
1509                 /* treat root commit as boundary */
1510                 if (!commit->parents && !show_root)
1511                         commit->object.flags |= UNINTERESTING;
1512
1513                 /* Take responsibility for the remaining entries */
1514                 for (ent = sb->ent; ent; ent = ent->next)
1515                         if (same_suspect(ent->suspect, suspect))
1516                                 found_guilty_entry(ent);
1517                 origin_decref(suspect);
1518
1519                 if (DEBUG) /* sanity */
1520                         sanity_check_refcnt(sb);
1521         }
1522 }
1523
1524 static const char *format_time(unsigned long time, const char *tz_str,
1525                                int show_raw_time)
1526 {
1527         static char time_buf[128];
1528         time_t t = time;
1529         int minutes, tz;
1530         struct tm *tm;
1531
1532         if (show_raw_time) {
1533                 sprintf(time_buf, "%lu %s", time, tz_str);
1534                 return time_buf;
1535         }
1536
1537         tz = atoi(tz_str);
1538         minutes = tz < 0 ? -tz : tz;
1539         minutes = (minutes / 100)*60 + (minutes % 100);
1540         minutes = tz < 0 ? -minutes : minutes;
1541         t = time + minutes * 60;
1542         tm = gmtime(&t);
1543
1544         strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);
1545         strcat(time_buf, tz_str);
1546         return time_buf;
1547 }
1548
1549 #define OUTPUT_ANNOTATE_COMPAT  001
1550 #define OUTPUT_LONG_OBJECT_NAME 002
1551 #define OUTPUT_RAW_TIMESTAMP    004
1552 #define OUTPUT_PORCELAIN        010
1553 #define OUTPUT_SHOW_NAME        020
1554 #define OUTPUT_SHOW_NUMBER      040
1555 #define OUTPUT_SHOW_SCORE      0100
1556 #define OUTPUT_NO_AUTHOR       0200
1557
1558 static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)
1559 {
1560         int cnt;
1561         const char *cp;
1562         struct origin *suspect = ent->suspect;
1563         char hex[41];
1564
1565         strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1566         printf("%s%c%d %d %d\n",
1567                hex,
1568                ent->guilty ? ' ' : '*', // purely for debugging
1569                ent->s_lno + 1,
1570                ent->lno + 1,
1571                ent->num_lines);
1572         if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1573                 struct commit_info ci;
1574                 suspect->commit->object.flags |= METAINFO_SHOWN;
1575                 get_commit_info(suspect->commit, &ci, 1);
1576                 printf("author %s\n", ci.author);
1577                 printf("author-mail %s\n", ci.author_mail);
1578                 printf("author-time %lu\n", ci.author_time);
1579                 printf("author-tz %s\n", ci.author_tz);
1580                 printf("committer %s\n", ci.committer);
1581                 printf("committer-mail %s\n", ci.committer_mail);
1582                 printf("committer-time %lu\n", ci.committer_time);
1583                 printf("committer-tz %s\n", ci.committer_tz);
1584                 write_filename_info(suspect->path);
1585                 printf("summary %s\n", ci.summary);
1586                 if (suspect->commit->object.flags & UNINTERESTING)
1587                         printf("boundary\n");
1588         }
1589         else if (suspect->commit->object.flags & MORE_THAN_ONE_PATH)
1590                 write_filename_info(suspect->path);
1591
1592         cp = nth_line(sb, ent->lno);
1593         for (cnt = 0; cnt < ent->num_lines; cnt++) {
1594                 char ch;
1595                 if (cnt)
1596                         printf("%s %d %d\n", hex,
1597                                ent->s_lno + 1 + cnt,
1598                                ent->lno + 1 + cnt);
1599                 putchar('\t');
1600                 do {
1601                         ch = *cp++;
1602                         putchar(ch);
1603                 } while (ch != '\n' &&
1604                          cp < sb->final_buf + sb->final_buf_size);
1605         }
1606 }
1607
1608 static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
1609 {
1610         int cnt;
1611         const char *cp;
1612         struct origin *suspect = ent->suspect;
1613         struct commit_info ci;
1614         char hex[41];
1615         int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
1616
1617         get_commit_info(suspect->commit, &ci, 1);
1618         strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1619
1620         cp = nth_line(sb, ent->lno);
1621         for (cnt = 0; cnt < ent->num_lines; cnt++) {
1622                 char ch;
1623                 int length = (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8;
1624
1625                 if (suspect->commit->object.flags & UNINTERESTING) {
1626                         if (blank_boundary)
1627                                 memset(hex, ' ', length);
1628                         else if (!cmd_is_annotate) {
1629                                 length--;
1630                                 putchar('^');
1631                         }
1632                 }
1633
1634                 printf("%.*s", length, hex);
1635                 if (opt & OUTPUT_ANNOTATE_COMPAT)
1636                         printf("\t(%10s\t%10s\t%d)", ci.author,
1637                                format_time(ci.author_time, ci.author_tz,
1638                                            show_raw_time),
1639                                ent->lno + 1 + cnt);
1640                 else {
1641                         if (opt & OUTPUT_SHOW_SCORE)
1642                                 printf(" %*d %02d",
1643                                        max_score_digits, ent->score,
1644                                        ent->suspect->refcnt);
1645                         if (opt & OUTPUT_SHOW_NAME)
1646                                 printf(" %-*.*s", longest_file, longest_file,
1647                                        suspect->path);
1648                         if (opt & OUTPUT_SHOW_NUMBER)
1649                                 printf(" %*d", max_orig_digits,
1650                                        ent->s_lno + 1 + cnt);
1651
1652                         if (!(opt & OUTPUT_NO_AUTHOR))
1653                                 printf(" (%-*.*s %10s",
1654                                        longest_author, longest_author,
1655                                        ci.author,
1656                                        format_time(ci.author_time,
1657                                                    ci.author_tz,
1658                                                    show_raw_time));
1659                         printf(" %*d) ",
1660                                max_digits, ent->lno + 1 + cnt);
1661                 }
1662                 do {
1663                         ch = *cp++;
1664                         putchar(ch);
1665                 } while (ch != '\n' &&
1666                          cp < sb->final_buf + sb->final_buf_size);
1667         }
1668 }
1669
1670 static void output(struct scoreboard *sb, int option)
1671 {
1672         struct blame_entry *ent;
1673
1674         if (option & OUTPUT_PORCELAIN) {
1675                 for (ent = sb->ent; ent; ent = ent->next) {
1676                         struct blame_entry *oth;
1677                         struct origin *suspect = ent->suspect;
1678                         struct commit *commit = suspect->commit;
1679                         if (commit->object.flags & MORE_THAN_ONE_PATH)
1680                                 continue;
1681                         for (oth = ent->next; oth; oth = oth->next) {
1682                                 if ((oth->suspect->commit != commit) ||
1683                                     !strcmp(oth->suspect->path, suspect->path))
1684                                         continue;
1685                                 commit->object.flags |= MORE_THAN_ONE_PATH;
1686                                 break;
1687                         }
1688                 }
1689         }
1690
1691         for (ent = sb->ent; ent; ent = ent->next) {
1692                 if (option & OUTPUT_PORCELAIN)
1693                         emit_porcelain(sb, ent);
1694                 else {
1695                         emit_other(sb, ent, option);
1696                 }
1697         }
1698 }
1699
1700 /*
1701  * To allow quick access to the contents of nth line in the
1702  * final image, prepare an index in the scoreboard.
1703  */
1704 static int prepare_lines(struct scoreboard *sb)
1705 {
1706         const char *buf = sb->final_buf;
1707         unsigned long len = sb->final_buf_size;
1708         int num = 0, incomplete = 0, bol = 1;
1709
1710         if (len && buf[len-1] != '\n')
1711                 incomplete++; /* incomplete line at the end */
1712         while (len--) {
1713                 if (bol) {
1714                         sb->lineno = xrealloc(sb->lineno,
1715                                               sizeof(int* ) * (num + 1));
1716                         sb->lineno[num] = buf - sb->final_buf;
1717                         bol = 0;
1718                 }
1719                 if (*buf++ == '\n') {
1720                         num++;
1721                         bol = 1;
1722                 }
1723         }
1724         sb->lineno = xrealloc(sb->lineno,
1725                               sizeof(int* ) * (num + incomplete + 1));
1726         sb->lineno[num + incomplete] = buf - sb->final_buf;
1727         sb->num_lines = num + incomplete;
1728         return sb->num_lines;
1729 }
1730
1731 /*
1732  * Add phony grafts for use with -S; this is primarily to
1733  * support git-cvsserver that wants to give a linear history
1734  * to its clients.
1735  */
1736 static int read_ancestry(const char *graft_file)
1737 {
1738         FILE *fp = fopen(graft_file, "r");
1739         char buf[1024];
1740         if (!fp)
1741                 return -1;
1742         while (fgets(buf, sizeof(buf), fp)) {
1743                 /* The format is just "Commit Parent1 Parent2 ...\n" */
1744                 int len = strlen(buf);
1745                 struct commit_graft *graft = read_graft_line(buf, len);
1746                 if (graft)
1747                         register_commit_graft(graft, 0);
1748         }
1749         fclose(fp);
1750         return 0;
1751 }
1752
1753 /*
1754  * How many columns do we need to show line numbers in decimal?
1755  */
1756 static int lineno_width(int lines)
1757 {
1758         int i, width;
1759
1760         for (width = 1, i = 10; i <= lines + 1; width++)
1761                 i *= 10;
1762         return width;
1763 }
1764
1765 /*
1766  * How many columns do we need to show line numbers, authors,
1767  * and filenames?
1768  */
1769 static void find_alignment(struct scoreboard *sb, int *option)
1770 {
1771         int longest_src_lines = 0;
1772         int longest_dst_lines = 0;
1773         unsigned largest_score = 0;
1774         struct blame_entry *e;
1775
1776         for (e = sb->ent; e; e = e->next) {
1777                 struct origin *suspect = e->suspect;
1778                 struct commit_info ci;
1779                 int num;
1780
1781                 if (strcmp(suspect->path, sb->path))
1782                         *option |= OUTPUT_SHOW_NAME;
1783                 num = strlen(suspect->path);
1784                 if (longest_file < num)
1785                         longest_file = num;
1786                 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1787                         suspect->commit->object.flags |= METAINFO_SHOWN;
1788                         get_commit_info(suspect->commit, &ci, 1);
1789                         num = strlen(ci.author);
1790                         if (longest_author < num)
1791                                 longest_author = num;
1792                 }
1793                 num = e->s_lno + e->num_lines;
1794                 if (longest_src_lines < num)
1795                         longest_src_lines = num;
1796                 num = e->lno + e->num_lines;
1797                 if (longest_dst_lines < num)
1798                         longest_dst_lines = num;
1799                 if (largest_score < ent_score(sb, e))
1800                         largest_score = ent_score(sb, e);
1801         }
1802         max_orig_digits = lineno_width(longest_src_lines);
1803         max_digits = lineno_width(longest_dst_lines);
1804         max_score_digits = lineno_width(largest_score);
1805 }
1806
1807 /*
1808  * For debugging -- origin is refcounted, and this asserts that
1809  * we do not underflow.
1810  */
1811 static void sanity_check_refcnt(struct scoreboard *sb)
1812 {
1813         int baa = 0;
1814         struct blame_entry *ent;
1815
1816         for (ent = sb->ent; ent; ent = ent->next) {
1817                 /* Nobody should have zero or negative refcnt */
1818                 if (ent->suspect->refcnt <= 0) {
1819                         fprintf(stderr, "%s in %s has negative refcnt %d\n",
1820                                 ent->suspect->path,
1821                                 sha1_to_hex(ent->suspect->commit->object.sha1),
1822                                 ent->suspect->refcnt);
1823                         baa = 1;
1824                 }
1825         }
1826         for (ent = sb->ent; ent; ent = ent->next) {
1827                 /* Mark the ones that haven't been checked */
1828                 if (0 < ent->suspect->refcnt)
1829                         ent->suspect->refcnt = -ent->suspect->refcnt;
1830         }
1831         for (ent = sb->ent; ent; ent = ent->next) {
1832                 /*
1833                  * ... then pick each and see if they have the the
1834                  * correct refcnt.
1835                  */
1836                 int found;
1837                 struct blame_entry *e;
1838                 struct origin *suspect = ent->suspect;
1839
1840                 if (0 < suspect->refcnt)
1841                         continue;
1842                 suspect->refcnt = -suspect->refcnt; /* Unmark */
1843                 for (found = 0, e = sb->ent; e; e = e->next) {
1844                         if (e->suspect != suspect)
1845                                 continue;
1846                         found++;
1847                 }
1848                 if (suspect->refcnt != found) {
1849                         fprintf(stderr, "%s in %s has refcnt %d, not %d\n",
1850                                 ent->suspect->path,
1851                                 sha1_to_hex(ent->suspect->commit->object.sha1),
1852                                 ent->suspect->refcnt, found);
1853                         baa = 2;
1854                 }
1855         }
1856         if (baa) {
1857                 int opt = 0160;
1858                 find_alignment(sb, &opt);
1859                 output(sb, opt);
1860                 die("Baa %d!", baa);
1861         }
1862 }
1863
1864 /*
1865  * Used for the command line parsing; check if the path exists
1866  * in the working tree.
1867  */
1868 static int has_path_in_work_tree(const char *path)
1869 {
1870         struct stat st;
1871         return !lstat(path, &st);
1872 }
1873
1874 static unsigned parse_score(const char *arg)
1875 {
1876         char *end;
1877         unsigned long score = strtoul(arg, &end, 10);
1878         if (*end)
1879                 return 0;
1880         return score;
1881 }
1882
1883 static const char *add_prefix(const char *prefix, const char *path)
1884 {
1885         if (!prefix || !prefix[0])
1886                 return path;
1887         return prefix_path(prefix, strlen(prefix), path);
1888 }
1889
1890 /*
1891  * Parsing of (comma separated) one item in the -L option
1892  */
1893 static const char *parse_loc(const char *spec,
1894                              struct scoreboard *sb, long lno,
1895                              long begin, long *ret)
1896 {
1897         char *term;
1898         const char *line;
1899         long num;
1900         int reg_error;
1901         regex_t regexp;
1902         regmatch_t match[1];
1903
1904         /* Allow "-L <something>,+20" to mean starting at <something>
1905          * for 20 lines, or "-L <something>,-5" for 5 lines ending at
1906          * <something>.
1907          */
1908         if (1 < begin && (spec[0] == '+' || spec[0] == '-')) {
1909                 num = strtol(spec + 1, &term, 10);
1910                 if (term != spec + 1) {
1911                         if (spec[0] == '-')
1912                                 num = 0 - num;
1913                         if (0 < num)
1914                                 *ret = begin + num - 2;
1915                         else if (!num)
1916                                 *ret = begin;
1917                         else
1918                                 *ret = begin + num;
1919                         return term;
1920                 }
1921                 return spec;
1922         }
1923         num = strtol(spec, &term, 10);
1924         if (term != spec) {
1925                 *ret = num;
1926                 return term;
1927         }
1928         if (spec[0] != '/')
1929                 return spec;
1930
1931         /* it could be a regexp of form /.../ */
1932         for (term = (char*) spec + 1; *term && *term != '/'; term++) {
1933                 if (*term == '\\')
1934                         term++;
1935         }
1936         if (*term != '/')
1937                 return spec;
1938
1939         /* try [spec+1 .. term-1] as regexp */
1940         *term = 0;
1941         begin--; /* input is in human terms */
1942         line = nth_line(sb, begin);
1943
1944         if (!(reg_error = regcomp(&regexp, spec + 1, REG_NEWLINE)) &&
1945             !(reg_error = regexec(&regexp, line, 1, match, 0))) {
1946                 const char *cp = line + match[0].rm_so;
1947                 const char *nline;
1948
1949                 while (begin++ < lno) {
1950                         nline = nth_line(sb, begin);
1951                         if (line <= cp && cp < nline)
1952                                 break;
1953                         line = nline;
1954                 }
1955                 *ret = begin;
1956                 regfree(&regexp);
1957                 *term++ = '/';
1958                 return term;
1959         }
1960         else {
1961                 char errbuf[1024];
1962                 regerror(reg_error, &regexp, errbuf, 1024);
1963                 die("-L parameter '%s': %s", spec + 1, errbuf);
1964         }
1965 }
1966
1967 /*
1968  * Parsing of -L option
1969  */
1970 static void prepare_blame_range(struct scoreboard *sb,
1971                                 const char *bottomtop,
1972                                 long lno,
1973                                 long *bottom, long *top)
1974 {
1975         const char *term;
1976
1977         term = parse_loc(bottomtop, sb, lno, 1, bottom);
1978         if (*term == ',') {
1979                 term = parse_loc(term + 1, sb, lno, *bottom + 1, top);
1980                 if (*term)
1981                         usage(blame_usage);
1982         }
1983         if (*term)
1984                 usage(blame_usage);
1985 }
1986
1987 static int git_blame_config(const char *var, const char *value)
1988 {
1989         if (!strcmp(var, "blame.showroot")) {
1990                 show_root = git_config_bool(var, value);
1991                 return 0;
1992         }
1993         if (!strcmp(var, "blame.blankboundary")) {
1994                 blank_boundary = git_config_bool(var, value);
1995                 return 0;
1996         }
1997         return git_default_config(var, value);
1998 }
1999
2000 static struct commit *fake_working_tree_commit(const char *path, const char *contents_from)
2001 {
2002         struct commit *commit;
2003         struct origin *origin;
2004         unsigned char head_sha1[20];
2005         struct strbuf buf;
2006         const char *ident;
2007         int fd;
2008         time_t now;
2009         int size, len;
2010         struct cache_entry *ce;
2011         unsigned mode;
2012
2013         if (get_sha1("HEAD", head_sha1))
2014                 die("No such ref: HEAD");
2015
2016         time(&now);
2017         commit = xcalloc(1, sizeof(*commit));
2018         commit->parents = xcalloc(1, sizeof(*commit->parents));
2019         commit->parents->item = lookup_commit_reference(head_sha1);
2020         commit->object.parsed = 1;
2021         commit->date = now;
2022         commit->object.type = OBJ_COMMIT;
2023
2024         origin = make_origin(commit, path);
2025
2026         strbuf_init(&buf);
2027         if (!contents_from || strcmp("-", contents_from)) {
2028                 struct stat st;
2029                 const char *read_from;
2030                 unsigned long fin_size;
2031
2032                 if (contents_from) {
2033                         if (stat(contents_from, &st) < 0)
2034                                 die("Cannot stat %s", contents_from);
2035                         read_from = contents_from;
2036                 }
2037                 else {
2038                         if (lstat(path, &st) < 0)
2039                                 die("Cannot lstat %s", path);
2040                         read_from = path;
2041                 }
2042                 fin_size = xsize_t(st.st_size);
2043                 mode = canon_mode(st.st_mode);
2044                 switch (st.st_mode & S_IFMT) {
2045                 case S_IFREG:
2046                         fd = open(read_from, O_RDONLY);
2047                         if (fd < 0)
2048                                 die("cannot open %s", read_from);
2049                         if (strbuf_read(&buf, fd) != xsize_t(st.st_size))
2050                                 die("cannot read %s", read_from);
2051                         break;
2052                 case S_IFLNK:
2053                         if (readlink(read_from, buf.buf, buf.alloc) != fin_size)
2054                                 die("cannot readlink %s", read_from);
2055                         buf.len = fin_size;
2056                         break;
2057                 default:
2058                         die("unsupported file type %s", read_from);
2059                 }
2060         }
2061         else {
2062                 /* Reading from stdin */
2063                 contents_from = "standard input";
2064                 mode = 0;
2065                 if (strbuf_read(&buf, 0) < 0)
2066                         die("read error %s from stdin", strerror(errno));
2067         }
2068         origin->file.ptr = buf.buf;
2069         origin->file.size = buf.len;
2070         pretend_sha1_file(buf.buf, buf.len, OBJ_BLOB, origin->blob_sha1);
2071         commit->util = origin;
2072
2073         /*
2074          * Read the current index, replace the path entry with
2075          * origin->blob_sha1 without mucking with its mode or type
2076          * bits; we are not going to write this index out -- we just
2077          * want to run "diff-index --cached".
2078          */
2079         discard_cache();
2080         read_cache();
2081
2082         len = strlen(path);
2083         if (!mode) {
2084                 int pos = cache_name_pos(path, len);
2085                 if (0 <= pos)
2086                         mode = ntohl(active_cache[pos]->ce_mode);
2087                 else
2088                         /* Let's not bother reading from HEAD tree */
2089                         mode = S_IFREG | 0644;
2090         }
2091         size = cache_entry_size(len);
2092         ce = xcalloc(1, size);
2093         hashcpy(ce->sha1, origin->blob_sha1);
2094         memcpy(ce->name, path, len);
2095         ce->ce_flags = create_ce_flags(len, 0);
2096         ce->ce_mode = create_ce_mode(mode);
2097         add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
2098
2099         /*
2100          * We are not going to write this out, so this does not matter
2101          * right now, but someday we might optimize diff-index --cached
2102          * with cache-tree information.
2103          */
2104         cache_tree_invalidate_path(active_cache_tree, path);
2105
2106         commit->buffer = xmalloc(400);
2107         ident = fmt_ident("Not Committed Yet", "not.committed.yet", NULL, 0);
2108         snprintf(commit->buffer, 400,
2109                 "tree 0000000000000000000000000000000000000000\n"
2110                 "parent %s\n"
2111                 "author %s\n"
2112                 "committer %s\n\n"
2113                 "Version of %s from %s\n",
2114                 sha1_to_hex(head_sha1),
2115                 ident, ident, path, contents_from ? contents_from : path);
2116         return commit;
2117 }
2118
2119 int cmd_blame(int argc, const char **argv, const char *prefix)
2120 {
2121         struct rev_info revs;
2122         const char *path;
2123         struct scoreboard sb;
2124         struct origin *o;
2125         struct blame_entry *ent;
2126         int i, seen_dashdash, unk, opt;
2127         long bottom, top, lno;
2128         int output_option = 0;
2129         int show_stats = 0;
2130         const char *revs_file = NULL;
2131         const char *final_commit_name = NULL;
2132         enum object_type type;
2133         const char *bottomtop = NULL;
2134         const char *contents_from = NULL;
2135
2136         cmd_is_annotate = !strcmp(argv[0], "annotate");
2137
2138         git_config(git_blame_config);
2139         save_commit_buffer = 0;
2140
2141         opt = 0;
2142         seen_dashdash = 0;
2143         for (unk = i = 1; i < argc; i++) {
2144                 const char *arg = argv[i];
2145                 if (*arg != '-')
2146                         break;
2147                 else if (!strcmp("-b", arg))
2148                         blank_boundary = 1;
2149                 else if (!strcmp("--root", arg))
2150                         show_root = 1;
2151                 else if (!strcmp(arg, "--show-stats"))
2152                         show_stats = 1;
2153                 else if (!strcmp("-c", arg))
2154                         output_option |= OUTPUT_ANNOTATE_COMPAT;
2155                 else if (!strcmp("-t", arg))
2156                         output_option |= OUTPUT_RAW_TIMESTAMP;
2157                 else if (!strcmp("-l", arg))
2158                         output_option |= OUTPUT_LONG_OBJECT_NAME;
2159                 else if (!strcmp("-s", arg))
2160                         output_option |= OUTPUT_NO_AUTHOR;
2161                 else if (!strcmp("-w", arg))
2162                         xdl_opts |= XDF_IGNORE_WHITESPACE;
2163                 else if (!strcmp("-S", arg) && ++i < argc)
2164                         revs_file = argv[i];
2165                 else if (!prefixcmp(arg, "-M")) {
2166                         opt |= PICKAXE_BLAME_MOVE;
2167                         blame_move_score = parse_score(arg+2);
2168                 }
2169                 else if (!prefixcmp(arg, "-C")) {
2170                         /*
2171                          * -C enables copy from removed files;
2172                          * -C -C enables copy from existing files, but only
2173                          *       when blaming a new file;
2174                          * -C -C -C enables copy from existing files for
2175                          *          everybody
2176                          */
2177                         if (opt & PICKAXE_BLAME_COPY_HARDER)
2178                                 opt |= PICKAXE_BLAME_COPY_HARDEST;
2179                         if (opt & PICKAXE_BLAME_COPY)
2180                                 opt |= PICKAXE_BLAME_COPY_HARDER;
2181                         opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
2182                         blame_copy_score = parse_score(arg+2);
2183                 }
2184                 else if (!prefixcmp(arg, "-L")) {
2185                         if (!arg[2]) {
2186                                 if (++i >= argc)
2187                                         usage(blame_usage);
2188                                 arg = argv[i];
2189                         }
2190                         else
2191                                 arg += 2;
2192                         if (bottomtop)
2193                                 die("More than one '-L n,m' option given");
2194                         bottomtop = arg;
2195                 }
2196                 else if (!strcmp("--contents", arg)) {
2197                         if (++i >= argc)
2198                                 usage(blame_usage);
2199                         contents_from = argv[i];
2200                 }
2201                 else if (!strcmp("--incremental", arg))
2202                         incremental = 1;
2203                 else if (!strcmp("--score-debug", arg))
2204                         output_option |= OUTPUT_SHOW_SCORE;
2205                 else if (!strcmp("-f", arg) ||
2206                          !strcmp("--show-name", arg))
2207                         output_option |= OUTPUT_SHOW_NAME;
2208                 else if (!strcmp("-n", arg) ||
2209                          !strcmp("--show-number", arg))
2210                         output_option |= OUTPUT_SHOW_NUMBER;
2211                 else if (!strcmp("-p", arg) ||
2212                          !strcmp("--porcelain", arg))
2213                         output_option |= OUTPUT_PORCELAIN;
2214                 else if (!strcmp("--", arg)) {
2215                         seen_dashdash = 1;
2216                         i++;
2217                         break;
2218                 }
2219                 else
2220                         argv[unk++] = arg;
2221         }
2222
2223         if (!incremental)
2224                 setup_pager();
2225
2226         if (!blame_move_score)
2227                 blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
2228         if (!blame_copy_score)
2229                 blame_copy_score = BLAME_DEFAULT_COPY_SCORE;
2230
2231         /*
2232          * We have collected options unknown to us in argv[1..unk]
2233          * which are to be passed to revision machinery if we are
2234          * going to do the "bottom" processing.
2235          *
2236          * The remaining are:
2237          *
2238          * (1) if seen_dashdash, its either
2239          *     "-options -- <path>" or
2240          *     "-options -- <path> <rev>".
2241          *     but the latter is allowed only if there is no
2242          *     options that we passed to revision machinery.
2243          *
2244          * (2) otherwise, we may have "--" somewhere later and
2245          *     might be looking at the first one of multiple 'rev'
2246          *     parameters (e.g. " master ^next ^maint -- path").
2247          *     See if there is a dashdash first, and give the
2248          *     arguments before that to revision machinery.
2249          *     After that there must be one 'path'.
2250          *
2251          * (3) otherwise, its one of the three:
2252          *     "-options <path> <rev>"
2253          *     "-options <rev> <path>"
2254          *     "-options <path>"
2255          *     but again the first one is allowed only if
2256          *     there is no options that we passed to revision
2257          *     machinery.
2258          */
2259
2260         if (seen_dashdash) {
2261                 /* (1) */
2262                 if (argc <= i)
2263                         usage(blame_usage);
2264                 path = add_prefix(prefix, argv[i]);
2265                 if (i + 1 == argc - 1) {
2266                         if (unk != 1)
2267                                 usage(blame_usage);
2268                         argv[unk++] = argv[i + 1];
2269                 }
2270                 else if (i + 1 != argc)
2271                         /* garbage at end */
2272                         usage(blame_usage);
2273         }
2274         else {
2275                 int j;
2276                 for (j = i; !seen_dashdash && j < argc; j++)
2277                         if (!strcmp(argv[j], "--"))
2278                                 seen_dashdash = j;
2279                 if (seen_dashdash) {
2280                         /* (2) */
2281                         if (seen_dashdash + 1 != argc - 1)
2282                                 usage(blame_usage);
2283                         path = add_prefix(prefix, argv[seen_dashdash + 1]);
2284                         for (j = i; j < seen_dashdash; j++)
2285                                 argv[unk++] = argv[j];
2286                 }
2287                 else {
2288                         /* (3) */
2289                         if (argc <= i)
2290                                 usage(blame_usage);
2291                         path = add_prefix(prefix, argv[i]);
2292                         if (i + 1 == argc - 1) {
2293                                 final_commit_name = argv[i + 1];
2294
2295                                 /* if (unk == 1) we could be getting
2296                                  * old-style
2297                                  */
2298                                 if (unk == 1 && !has_path_in_work_tree(path)) {
2299                                         path = add_prefix(prefix, argv[i + 1]);
2300                                         final_commit_name = argv[i];
2301                                 }
2302                         }
2303                         else if (i != argc - 1)
2304                                 usage(blame_usage); /* garbage at end */
2305
2306                         if (!has_path_in_work_tree(path))
2307                                 die("cannot stat path %s: %s",
2308                                     path, strerror(errno));
2309                 }
2310         }
2311
2312         if (final_commit_name)
2313                 argv[unk++] = final_commit_name;
2314
2315         /*
2316          * Now we got rev and path.  We do not want the path pruning
2317          * but we may want "bottom" processing.
2318          */
2319         argv[unk++] = "--"; /* terminate the rev name */
2320         argv[unk] = NULL;
2321
2322         init_revisions(&revs, NULL);
2323         setup_revisions(unk, argv, &revs, NULL);
2324         memset(&sb, 0, sizeof(sb));
2325
2326         /*
2327          * There must be one and only one positive commit in the
2328          * revs->pending array.
2329          */
2330         for (i = 0; i < revs.pending.nr; i++) {
2331                 struct object *obj = revs.pending.objects[i].item;
2332                 if (obj->flags & UNINTERESTING)
2333                         continue;
2334                 while (obj->type == OBJ_TAG)
2335                         obj = deref_tag(obj, NULL, 0);
2336                 if (obj->type != OBJ_COMMIT)
2337                         die("Non commit %s?",
2338                             revs.pending.objects[i].name);
2339                 if (sb.final)
2340                         die("More than one commit to dig from %s and %s?",
2341                             revs.pending.objects[i].name,
2342                             final_commit_name);
2343                 sb.final = (struct commit *) obj;
2344                 final_commit_name = revs.pending.objects[i].name;
2345         }
2346
2347         if (!sb.final) {
2348                 /*
2349                  * "--not A B -- path" without anything positive;
2350                  * do not default to HEAD, but use the working tree
2351                  * or "--contents".
2352                  */
2353                 sb.final = fake_working_tree_commit(path, contents_from);
2354                 add_pending_object(&revs, &(sb.final->object), ":");
2355         }
2356         else if (contents_from)
2357                 die("Cannot use --contents with final commit object name");
2358
2359         /*
2360          * If we have bottom, this will mark the ancestors of the
2361          * bottom commits we would reach while traversing as
2362          * uninteresting.
2363          */
2364         prepare_revision_walk(&revs);
2365
2366         if (is_null_sha1(sb.final->object.sha1)) {
2367                 char *buf;
2368                 o = sb.final->util;
2369                 buf = xmalloc(o->file.size + 1);
2370                 memcpy(buf, o->file.ptr, o->file.size + 1);
2371                 sb.final_buf = buf;
2372                 sb.final_buf_size = o->file.size;
2373         }
2374         else {
2375                 o = get_origin(&sb, sb.final, path);
2376                 if (fill_blob_sha1(o))
2377                         die("no such path %s in %s", path, final_commit_name);
2378
2379                 sb.final_buf = read_sha1_file(o->blob_sha1, &type,
2380                                               &sb.final_buf_size);
2381                 if (!sb.final_buf)
2382                         die("Cannot read blob %s for path %s",
2383                             sha1_to_hex(o->blob_sha1),
2384                             path);
2385         }
2386         num_read_blob++;
2387         lno = prepare_lines(&sb);
2388
2389         bottom = top = 0;
2390         if (bottomtop)
2391                 prepare_blame_range(&sb, bottomtop, lno, &bottom, &top);
2392         if (bottom && top && top < bottom) {
2393                 long tmp;
2394                 tmp = top; top = bottom; bottom = tmp;
2395         }
2396         if (bottom < 1)
2397                 bottom = 1;
2398         if (top < 1)
2399                 top = lno;
2400         bottom--;
2401         if (lno < top)
2402                 die("file %s has only %lu lines", path, lno);
2403
2404         ent = xcalloc(1, sizeof(*ent));
2405         ent->lno = bottom;
2406         ent->num_lines = top - bottom;
2407         ent->suspect = o;
2408         ent->s_lno = bottom;
2409
2410         sb.ent = ent;
2411         sb.path = path;
2412
2413         if (revs_file && read_ancestry(revs_file))
2414                 die("reading graft file %s failed: %s",
2415                     revs_file, strerror(errno));
2416
2417         read_mailmap(&mailmap, ".mailmap", NULL);
2418
2419         assign_blame(&sb, &revs, opt);
2420
2421         if (incremental)
2422                 return 0;
2423
2424         coalesce(&sb);
2425
2426         if (!(output_option & OUTPUT_PORCELAIN))
2427                 find_alignment(&sb, &output_option);
2428
2429         output(&sb, output_option);
2430         free((void *)sb.final_buf);
2431         for (ent = sb.ent; ent; ) {
2432                 struct blame_entry *e = ent->next;
2433                 free(ent);
2434                 ent = e;
2435         }
2436
2437         if (show_stats) {
2438                 printf("num read blob: %d\n", num_read_blob);
2439                 printf("num get patch: %d\n", num_get_patch);
2440                 printf("num commits: %d\n", num_commits);
2441         }
2442         return 0;
2443 }