X-Git-Url: https://asedeno.scripts.mit.edu/gitweb/?a=blobdiff_plain;f=fast-import.c;h=74f08bd554ae4cab1dc9ee38272376e5cbec1200;hb=0476228de500049804ae599f557658c3f9042ad0;hp=901784fe911567752c872e5433f0295768654b5a;hpb=b10b9184afc9e62140d307dcf2f5d7e625f78c79;p=git.git diff --git a/fast-import.c b/fast-import.c index 901784fe9..74f08bd55 100644 --- a/fast-import.c +++ b/fast-import.c @@ -164,12 +164,11 @@ Format of STDIN stream: struct object_entry { + struct pack_idx_entry idx; struct object_entry *next; - uint32_t offset; uint32_t type : TYPE_BITS, pack_id : PACK_ID_BITS, depth : DEPTH_BITS; - unsigned char sha1[20]; }; struct object_entry_pool @@ -192,7 +191,7 @@ struct mark_set struct last_object { struct strbuf data; - uint32_t offset; + off_t offset; unsigned int depth; unsigned no_swap : 1; }; @@ -280,7 +279,8 @@ struct recent_command /* Configured limits on output */ static unsigned long max_depth = 10; -static off_t max_packsize = (1LL << 32) - 1; +static off_t max_packsize; +static uintmax_t big_file_threshold = 512 * 1024 * 1024; static int force_update; static int pack_compression_level = Z_DEFAULT_COMPRESSION; static int pack_compression_seen; @@ -312,9 +312,10 @@ static struct atom_str **atom_table; /* The .pack file being generated */ static unsigned int pack_id; +static struct sha1file *pack_file; static struct packed_git *pack_data; static struct packed_git **all_packs; -static unsigned long pack_size; +static off_t pack_size; /* Table of objects we've written. */ static unsigned int object_entry_alloc = 5000; @@ -520,7 +521,7 @@ static struct object_entry *new_object(unsigned char *sha1) alloc_objects(object_entry_alloc); e = blocks->next_free++; - hashcpy(e->sha1, sha1); + hashcpy(e->idx.sha1, sha1); return e; } @@ -529,7 +530,7 @@ static struct object_entry *find_object(unsigned char *sha1) unsigned int h = sha1[0] << 8 | sha1[1]; struct object_entry *e; for (e = object_table[h]; e; e = e->next) - if (!hashcmp(sha1, e->sha1)) + if (!hashcmp(sha1, e->idx.sha1)) return e; return NULL; } @@ -541,7 +542,7 @@ static struct object_entry *insert_object(unsigned char *sha1) struct object_entry *p = NULL; while (e) { - if (!hashcmp(sha1, e->sha1)) + if (!hashcmp(sha1, e->idx.sha1)) return e; p = e; e = e->next; @@ -549,7 +550,7 @@ static struct object_entry *insert_object(unsigned char *sha1) e = new_object(sha1); e->next = NULL; - e->offset = 0; + e->idx.offset = 0; if (p) p->next = e; else @@ -838,11 +839,12 @@ static void start_packfile(void) p = xcalloc(1, sizeof(*p) + strlen(tmpfile) + 2); strcpy(p->pack_name, tmpfile); p->pack_fd = pack_fd; + pack_file = sha1fd(pack_fd, p->pack_name); hdr.hdr_signature = htonl(PACK_SIGNATURE); hdr.hdr_version = htonl(2); hdr.hdr_entries = 0; - write_or_die(p->pack_fd, &hdr, sizeof(hdr)); + sha1write(pack_file, &hdr, sizeof(hdr)); pack_data = p; pack_size = sizeof(hdr); @@ -852,67 +854,30 @@ static void start_packfile(void) all_packs[pack_id] = p; } -static int oecmp (const void *a_, const void *b_) -{ - struct object_entry *a = *((struct object_entry**)a_); - struct object_entry *b = *((struct object_entry**)b_); - return hashcmp(a->sha1, b->sha1); -} - -static char *create_index(void) +static const char *create_index(void) { - static char tmpfile[PATH_MAX]; - git_SHA_CTX ctx; - struct sha1file *f; - struct object_entry **idx, **c, **last, *e; + const char *tmpfile; + struct pack_idx_entry **idx, **c, **last; + struct object_entry *e; struct object_entry_pool *o; - uint32_t array[256]; - int i, idx_fd; - /* Build the sorted table of object IDs. */ - idx = xmalloc(object_count * sizeof(struct object_entry*)); + /* Build the table of object IDs. */ + idx = xmalloc(object_count * sizeof(*idx)); c = idx; for (o = blocks; o; o = o->next_pool) for (e = o->next_free; e-- != o->entries;) if (pack_id == e->pack_id) - *c++ = e; + *c++ = &e->idx; last = idx + object_count; if (c != last) die("internal consistency error creating the index"); - qsort(idx, object_count, sizeof(struct object_entry*), oecmp); - /* Generate the fan-out array. */ - c = idx; - for (i = 0; i < 256; i++) { - struct object_entry **next = c; - while (next < last) { - if ((*next)->sha1[0] != i) - break; - next++; - } - array[i] = htonl(next - idx); - c = next; - } - - idx_fd = odb_mkstemp(tmpfile, sizeof(tmpfile), - "pack/tmp_idx_XXXXXX"); - f = sha1fd(idx_fd, tmpfile); - sha1write(f, array, 256 * sizeof(int)); - git_SHA1_Init(&ctx); - for (c = idx; c != last; c++) { - uint32_t offset = htonl((*c)->offset); - sha1write(f, &offset, 4); - sha1write(f, (*c)->sha1, sizeof((*c)->sha1)); - git_SHA1_Update(&ctx, (*c)->sha1, 20); - } - sha1write(f, pack_data->sha1, sizeof(pack_data->sha1)); - sha1close(f, NULL, CSUM_FSYNC); + tmpfile = write_idx_file(NULL, idx, object_count, pack_data->sha1); free(idx); - git_SHA1_Final(pack_data->sha1, &ctx); return tmpfile; } -static char *keep_pack(char *curr_index_name) +static char *keep_pack(const char *curr_index_name) { static char name[PATH_MAX]; static const char *keep_msg = "fast-import"; @@ -934,6 +899,7 @@ static char *keep_pack(char *curr_index_name) get_object_directory(), sha1_to_hex(pack_data->sha1)); if (move_temp_to_file(curr_index_name, name)) die("cannot store index file"); + free((void *)curr_index_name); return name; } @@ -956,15 +922,17 @@ static void end_packfile(void) clear_delta_base_cache(); if (object_count) { + unsigned char cur_pack_sha1[20]; char *idx_name; int i; struct branch *b; struct tag *t; close_pack_windows(pack_data); + sha1close(pack_file, cur_pack_sha1, 0); fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1, pack_data->pack_name, object_count, - NULL, 0); + cur_pack_sha1, pack_size); close(pack_data->pack_fd); idx_name = keep_pack(create_index()); @@ -1014,7 +982,7 @@ static void cycle_packfile(void) static size_t encode_header( enum object_type type, - size_t size, + uintmax_t size, unsigned char *hdr) { int n = 1; @@ -1062,25 +1030,21 @@ static int store_object( e = insert_object(sha1); if (mark) insert_mark(mark, e); - if (e->offset) { + if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; } else if (find_sha1_pack(sha1, packed_git)) { e->type = type; e->pack_id = MAX_PACK_ID; - e->offset = 1; /* just not zero! */ + e->idx.offset = 1; /* just not zero! */ duplicate_count_by_type[type]++; return 1; } - if (last && last->data.buf && last->depth < max_depth) { + if (last && last->data.buf && last->depth < max_depth && dat->len > 20) { delta = diff_delta(last->data.buf, last->data.len, dat->buf, dat->len, - &deltalen, 0); - if (delta && deltalen >= dat->len) { - free(delta); - delta = NULL; - } + &deltalen, dat->len - 20); } else delta = NULL; @@ -1100,7 +1064,7 @@ static int store_object( deflateEnd(&s); /* Determine if we should auto-checkpoint. */ - if ((pack_size + 60 + s.total_out) > max_packsize + if ((max_packsize && (pack_size + 60 + s.total_out) > max_packsize) || (pack_size + 60 + s.total_out) < pack_size) { /* This new object needs to *not* have the current pack_id. */ @@ -1126,36 +1090,40 @@ static int store_object( e->type = type; e->pack_id = pack_id; - e->offset = pack_size; + e->idx.offset = pack_size; object_count++; object_count_by_type[type]++; + crc32_begin(pack_file); + if (delta) { - unsigned long ofs = e->offset - last->offset; + off_t ofs = e->idx.offset - last->offset; unsigned pos = sizeof(hdr) - 1; delta_count_by_type[type]++; e->depth = last->depth + 1; hdrlen = encode_header(OBJ_OFS_DELTA, deltalen, hdr); - write_or_die(pack_data->pack_fd, hdr, hdrlen); + sha1write(pack_file, hdr, hdrlen); pack_size += hdrlen; hdr[pos] = ofs & 127; while (ofs >>= 7) hdr[--pos] = 128 | (--ofs & 127); - write_or_die(pack_data->pack_fd, hdr + pos, sizeof(hdr) - pos); + sha1write(pack_file, hdr + pos, sizeof(hdr) - pos); pack_size += sizeof(hdr) - pos; } else { e->depth = 0; hdrlen = encode_header(type, dat->len, hdr); - write_or_die(pack_data->pack_fd, hdr, hdrlen); + sha1write(pack_file, hdr, hdrlen); pack_size += hdrlen; } - write_or_die(pack_data->pack_fd, out, s.total_out); + sha1write(pack_file, out, s.total_out); pack_size += s.total_out; + e->idx.crc32 = crc32_end(pack_file); + free(out); free(delta); if (last) { @@ -1164,12 +1132,137 @@ static int store_object( } else { strbuf_swap(&last->data, dat); } - last->offset = e->offset; + last->offset = e->idx.offset; last->depth = e->depth; } return 0; } +static void truncate_pack(off_t to, git_SHA_CTX *ctx) +{ + if (ftruncate(pack_data->pack_fd, to) + || lseek(pack_data->pack_fd, to, SEEK_SET) != to) + die_errno("cannot truncate pack to skip duplicate"); + pack_size = to; + + /* yes this is a layering violation */ + pack_file->total = to; + pack_file->offset = 0; + pack_file->ctx = *ctx; +} + +static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark) +{ + size_t in_sz = 64 * 1024, out_sz = 64 * 1024; + unsigned char *in_buf = xmalloc(in_sz); + unsigned char *out_buf = xmalloc(out_sz); + struct object_entry *e; + unsigned char sha1[20]; + unsigned long hdrlen; + off_t offset; + git_SHA_CTX c; + git_SHA_CTX pack_file_ctx; + z_stream s; + int status = Z_OK; + + /* Determine if we should auto-checkpoint. */ + if ((max_packsize && (pack_size + 60 + len) > max_packsize) + || (pack_size + 60 + len) < pack_size) + cycle_packfile(); + + offset = pack_size; + + /* preserve the pack_file SHA1 ctx in case we have to truncate later */ + sha1flush(pack_file); + pack_file_ctx = pack_file->ctx; + + hdrlen = snprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1; + if (out_sz <= hdrlen) + die("impossibly large object header"); + + git_SHA1_Init(&c); + git_SHA1_Update(&c, out_buf, hdrlen); + + crc32_begin(pack_file); + + memset(&s, 0, sizeof(s)); + deflateInit(&s, pack_compression_level); + + hdrlen = encode_header(OBJ_BLOB, len, out_buf); + if (out_sz <= hdrlen) + die("impossibly large object header"); + + s.next_out = out_buf + hdrlen; + s.avail_out = out_sz - hdrlen; + + while (status != Z_STREAM_END) { + if (0 < len && !s.avail_in) { + size_t cnt = in_sz < len ? in_sz : (size_t)len; + size_t n = fread(in_buf, 1, cnt, stdin); + if (!n && feof(stdin)) + die("EOF in data (%" PRIuMAX " bytes remaining)", len); + + git_SHA1_Update(&c, in_buf, n); + s.next_in = in_buf; + s.avail_in = n; + len -= n; + } + + status = deflate(&s, len ? 0 : Z_FINISH); + + if (!s.avail_out || status == Z_STREAM_END) { + size_t n = s.next_out - out_buf; + sha1write(pack_file, out_buf, n); + pack_size += n; + s.next_out = out_buf; + s.avail_out = out_sz; + } + + switch (status) { + case Z_OK: + case Z_BUF_ERROR: + case Z_STREAM_END: + continue; + default: + die("unexpected deflate failure: %d", status); + } + } + deflateEnd(&s); + git_SHA1_Final(sha1, &c); + + if (sha1out) + hashcpy(sha1out, sha1); + + e = insert_object(sha1); + + if (mark) + insert_mark(mark, e); + + if (e->idx.offset) { + duplicate_count_by_type[OBJ_BLOB]++; + truncate_pack(offset, &pack_file_ctx); + + } else if (find_sha1_pack(sha1, packed_git)) { + e->type = OBJ_BLOB; + e->pack_id = MAX_PACK_ID; + e->idx.offset = 1; /* just not zero! */ + duplicate_count_by_type[OBJ_BLOB]++; + truncate_pack(offset, &pack_file_ctx); + + } else { + e->depth = 0; + e->type = OBJ_BLOB; + e->pack_id = pack_id; + e->idx.offset = offset; + e->idx.crc32 = crc32_end(pack_file); + object_count++; + object_count_by_type[OBJ_BLOB]++; + } + + free(in_buf); + free(out_buf); +} + /* All calls must be guarded by find_object() or find_mark() to * ensure the 'struct object_entry' passed was written by this * process instance. We unpack the entry by the offset, avoiding @@ -1204,6 +1297,7 @@ static void *gfi_unpack_entry( * the newly written data. */ close_pack_windows(p); + sha1flush(pack_file); /* We have to offer 20 bytes additional on the end of * the packfile as the core unpacker code assumes the @@ -1213,7 +1307,7 @@ static void *gfi_unpack_entry( */ p->pack_size = pack_size + 20; } - return unpack_entry(p, oe->offset, &type, sizep); + return unpack_entry(p, oe->idx.offset, &type, sizep); } static const char *get_mode(const char *str, uint16_t *modep) @@ -1344,7 +1438,7 @@ static void store_tree(struct tree_entry *root) if (S_ISDIR(root->versions[0].mode) && le && le->pack_id == pack_id) { mktree(t, 0, &old_tree); lo.data = old_tree; - lo.offset = le->offset; + lo.offset = le->idx.offset; lo.depth = t->delta_depth; } @@ -1602,7 +1696,7 @@ static void dump_marks_helper(FILE *f, for (k = 0; k < 1024; k++) { if (m->data.marked[k]) fprintf(f, ":%" PRIuMAX " %s\n", base + k, - sha1_to_hex(m->data.marked[k]->sha1)); + sha1_to_hex(m->data.marked[k]->idx.sha1)); } } } @@ -1685,7 +1779,7 @@ static void read_marks(void) e = insert_object(sha1); e->type = type; e->pack_id = MAX_PACK_ID; - e->offset = 1; /* just not zero! */ + e->idx.offset = 1; /* just not zero! */ } insert_mark(mark, e); } @@ -1757,7 +1851,7 @@ static void parse_mark(void) next_mark = 0; } -static void parse_data(struct strbuf *sb) +static int parse_data(struct strbuf *sb, uintmax_t limit, uintmax_t *len_res) { strbuf_reset(sb); @@ -1781,9 +1875,15 @@ static void parse_data(struct strbuf *sb) free(term); } else { - size_t n = 0, length; + uintmax_t len = strtoumax(command_buf.buf + 5, NULL, 10); + size_t n = 0, length = (size_t)len; - length = strtoul(command_buf.buf + 5, NULL, 10); + if (limit && limit < len) { + *len_res = len; + return 0; + } + if (length < len) + die("data is too large to use in this context"); while (n < length) { size_t s = strbuf_fread(sb, length - n, stdin); @@ -1795,6 +1895,7 @@ static void parse_data(struct strbuf *sb) } skip_optional_lf(); + return 1; } static int validate_raw_date(const char *src, char *result, int maxlen) @@ -1859,14 +1960,32 @@ static char *parse_ident(const char *buf) return ident; } -static void parse_new_blob(void) +static void parse_and_store_blob( + struct last_object *last, + unsigned char *sha1out, + uintmax_t mark) { static struct strbuf buf = STRBUF_INIT; + uintmax_t len; + + if (parse_data(&buf, big_file_threshold, &len)) + store_object(OBJ_BLOB, &buf, last, sha1out, mark); + else { + if (last) { + strbuf_release(&last->data); + last->offset = 0; + last->depth = 0; + } + stream_blob(len, sha1out, mark); + skip_optional_lf(); + } +} +static void parse_new_blob(void) +{ read_next_command(); parse_mark(); - parse_data(&buf); - store_object(OBJ_BLOB, &buf, &last_blob, NULL, next_mark); + parse_and_store_blob(&last_blob, NULL, next_mark); } static void unload_one_branch(void) @@ -2045,7 +2164,7 @@ static void file_change_m(struct branch *b) if (*p == ':') { char *x; oe = find_mark(strtoumax(p + 1, &x, 10)); - hashcpy(sha1, oe->sha1); + hashcpy(sha1, oe->idx.sha1); p = x; } else if (!prefixcmp(p, "inline")) { inline_data = 1; @@ -2080,15 +2199,12 @@ static void file_change_m(struct branch *b) * another repository. */ } else if (inline_data) { - static struct strbuf buf = STRBUF_INIT; - if (p != uq.buf) { strbuf_addstr(&uq, p); p = uq.buf; } read_next_command(); - parse_data(&buf); - store_object(OBJ_BLOB, &buf, &last_blob, sha1, 0); + parse_and_store_blob(&last_blob, sha1, 0); } else if (oe) { if (oe->type != OBJ_BLOB) die("Not a blob (actually a %s): %s", @@ -2181,7 +2297,7 @@ static void note_change_n(struct branch *b, unsigned char old_fanout) if (*p == ':') { char *x; oe = find_mark(strtoumax(p + 1, &x, 10)); - hashcpy(sha1, oe->sha1); + hashcpy(sha1, oe->idx.sha1); p = x; } else if (!prefixcmp(p, "inline")) { inline_data = 1; @@ -2204,7 +2320,7 @@ static void note_change_n(struct branch *b, unsigned char old_fanout) struct object_entry *commit_oe = find_mark(commit_mark); if (commit_oe->type != OBJ_COMMIT) die("Mark :%" PRIuMAX " not a commit", commit_mark); - hashcpy(commit_sha1, commit_oe->sha1); + hashcpy(commit_sha1, commit_oe->idx.sha1); } else if (!get_sha1(p, commit_sha1)) { unsigned long size; char *buf = read_object_with_reference(commit_sha1, @@ -2216,15 +2332,12 @@ static void note_change_n(struct branch *b, unsigned char old_fanout) die("Invalid ref name or SHA1 expression: %s", p); if (inline_data) { - static struct strbuf buf = STRBUF_INIT; - if (p != uq.buf) { strbuf_addstr(&uq, p); p = uq.buf; } read_next_command(); - parse_data(&buf); - store_object(OBJ_BLOB, &buf, &last_blob, sha1, 0); + parse_and_store_blob(&last_blob, sha1, 0); } else if (oe) { if (oe->type != OBJ_BLOB) die("Not a blob (actually a %s): %s", @@ -2314,7 +2427,7 @@ static int parse_from(struct branch *b) struct object_entry *oe = find_mark(idnum); if (oe->type != OBJ_COMMIT) die("Mark :%" PRIuMAX " not a commit", idnum); - hashcpy(b->sha1, oe->sha1); + hashcpy(b->sha1, oe->idx.sha1); if (oe->pack_id != MAX_PACK_ID) { unsigned long size; char *buf = gfi_unpack_entry(oe, &size); @@ -2349,7 +2462,7 @@ static struct hash_list *parse_merge(unsigned int *count) struct object_entry *oe = find_mark(idnum); if (oe->type != OBJ_COMMIT) die("Mark :%" PRIuMAX " not a commit", idnum); - hashcpy(n->sha1, oe->sha1); + hashcpy(n->sha1, oe->idx.sha1); } else if (!get_sha1(from, n->sha1)) { unsigned long size; char *buf = read_object_with_reference(n->sha1, @@ -2401,7 +2514,7 @@ static void parse_new_commit(void) } if (!committer) die("Expected committer but didn't get one"); - parse_data(&msg); + parse_data(&msg, 0, NULL); read_next_command(); parse_from(b); merge_list = parse_merge(&merge_count); @@ -2507,7 +2620,7 @@ static void parse_new_tag(void) from_mark = strtoumax(from + 1, NULL, 10); oe = find_mark(from_mark); type = oe->type; - hashcpy(sha1, oe->sha1); + hashcpy(sha1, oe->idx.sha1); } else if (!get_sha1(from, sha1)) { unsigned long size; char *buf; @@ -2528,7 +2641,7 @@ static void parse_new_tag(void) tagger = NULL; /* tag payload/message */ - parse_data(&msg); + parse_data(&msg, 0, NULL); /* build the tag object */ strbuf_reset(&new_data); @@ -2632,11 +2745,6 @@ static void option_date_format(const char *fmt) die("unknown --date-format argument %s", fmt); } -static void option_max_pack_size(const char *packsize) -{ - max_packsize = strtoumax(packsize, NULL, 0) * 1024 * 1024; -} - static void option_depth(const char *depth) { max_depth = strtoul(depth, NULL, 0); @@ -2666,7 +2774,22 @@ static void option_export_pack_edges(const char *edges) static int parse_one_option(const char *option) { if (!prefixcmp(option, "max-pack-size=")) { - option_max_pack_size(option + 14); + unsigned long v; + if (!git_parse_ulong(option + 14, &v)) + return 0; + if (v < 8192) { + warning("max-pack-size is now in bytes, assuming --max-pack-size=%lum", v); + v *= 1024 * 1024; + } else if (v < 1024 * 1024) { + warning("minimum max-pack-size is 1 MiB"); + v = 1024 * 1024; + } + max_packsize = v; + } else if (!prefixcmp(option, "big-file-threshold=")) { + unsigned long v; + if (!git_parse_ulong(option + 19, &v)) + return 0; + big_file_threshold = v; } else if (!prefixcmp(option, "depth=")) { option_depth(option + 6); } else if (!prefixcmp(option, "active-branches=")) { @@ -2749,11 +2872,26 @@ static int git_pack_config(const char *k, const char *v, void *cb) pack_compression_seen = 1; return 0; } + if (!strcmp(k, "pack.indexversion")) { + pack_idx_default_version = git_config_int(k, v); + if (pack_idx_default_version > 2) + die("bad pack.indexversion=%"PRIu32, + pack_idx_default_version); + return 0; + } + if (!strcmp(k, "pack.packsizelimit")) { + max_packsize = git_config_ulong(k, v); + return 0; + } + if (!strcmp(k, "core.bigfilethreshold")) { + long n = git_config_int(k, v); + big_file_threshold = 0 < n ? n : 0; + } return git_default_config(k, v, cb); } static const char fast_import_usage[] = -"git fast-import [--date-format=f] [--max-pack-size=n] [--depth=n] [--active-branches=n] [--export-marks=marks.file]"; +"git fast-import [--date-format=f] [--max-pack-size=n] [--big-file-threshold=n] [--depth=n] [--active-branches=n] [--export-marks=marks.file]"; static void parse_argv(void) {