]> asedeno.scripts.mit.edu Git - git.git/blob - fast-import.c
Cleaned up memory allocation for object_entry structs.
[git.git] / fast-import.c
1 #include "builtin.h"
2 #include "cache.h"
3 #include "object.h"
4 #include "blob.h"
5 #include "delta.h"
6 #include "pack.h"
7 #include "csum-file.h"
8
9 struct object_entry
10 {
11         struct object_entry *next;
12         unsigned long offset;
13         unsigned char sha1[20];
14 };
15
16 struct object_entry_block
17 {
18         struct object_entry_block *next_block;
19         struct object_entry *next_free;
20         struct object_entry *end;
21         struct object_entry entries[0];
22 };
23
24 static int max_depth = 10;
25 static unsigned long alloc_count;
26 static unsigned long object_count;
27 static unsigned long duplicate_count;
28 static unsigned long packoff;
29 static int packfd;
30 static int current_depth;
31 static void *lastdat;
32 static unsigned long lastdatlen;
33 static unsigned char lastsha1[20];
34 static unsigned char packsha1[20];
35 struct object_entry *object_table[1 << 16];
36 struct object_entry_block *blocks;
37
38 static void alloc_objects(int cnt)
39 {
40         struct object_entry_block *b;
41
42         b = xmalloc(sizeof(struct object_entry_block)
43                 + cnt * sizeof(struct object_entry));
44         b->next_block = blocks;
45         b->next_free = b->entries;
46         b->end = b->entries + cnt;
47         blocks = b;
48         alloc_count += cnt;
49 }
50
51 static struct object_entry* new_object(unsigned char *sha1)
52 {
53         struct object_entry *e;
54
55         if (blocks->next_free == blocks->end)
56                 alloc_objects(1000);
57
58         e = blocks->next_free++;
59         memcpy(e->sha1, sha1, sizeof(e->sha1));
60         return e;
61 }
62
63 static struct object_entry* insert_object(unsigned char *sha1)
64 {
65         unsigned int h = sha1[0] << 8 | sha1[1];
66         struct object_entry *e = object_table[h];
67         struct object_entry *p = 0;
68
69         while (e) {
70                 if (!memcmp(sha1, e->sha1, sizeof(e->sha1)))
71                         return e;
72                 p = e;
73                 e = e->next;
74         }
75
76         e = new_object(sha1);
77         e->next = 0;
78         e->offset = 0;
79         if (p)
80                 p->next = e;
81         else
82                 object_table[h] = e;
83         return e;
84 }
85
86 static ssize_t yread(int fd, void *buffer, size_t length)
87 {
88         ssize_t ret = 0;
89         while (ret < length) {
90                 ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
91                 if (size < 0) {
92                         return size;
93                 }
94                 if (size == 0) {
95                         return ret;
96                 }
97                 ret += size;
98         }
99         return ret;
100 }
101
102 static ssize_t ywrite(int fd, void *buffer, size_t length)
103 {
104         ssize_t ret = 0;
105         while (ret < length) {
106                 ssize_t size = xwrite(fd, (char *) buffer + ret, length - ret);
107                 if (size < 0) {
108                         return size;
109                 }
110                 if (size == 0) {
111                         return ret;
112                 }
113                 ret += size;
114         }
115         return ret;
116 }
117
118 static unsigned long encode_header(enum object_type type, unsigned long size, unsigned char *hdr)
119 {
120         int n = 1;
121         unsigned char c;
122
123         if (type < OBJ_COMMIT || type > OBJ_DELTA)
124                 die("bad type %d", type);
125
126         c = (type << 4) | (size & 15);
127         size >>= 4;
128         while (size) {
129                 *hdr++ = c | 0x80;
130                 c = size & 0x7f;
131                 size >>= 7;
132                 n++;
133         }
134         *hdr = c;
135         return n;
136 }
137
138 static void write_blob(void *dat, unsigned long datlen)
139 {
140         z_stream s;
141         void *out, *delta;
142         unsigned char hdr[64];
143         unsigned long hdrlen, deltalen;
144
145         if (lastdat && current_depth < max_depth) {
146                 delta = diff_delta(lastdat, lastdatlen,
147                         dat, datlen,
148                         &deltalen, 0);
149         } else
150                 delta = 0;
151
152         memset(&s, 0, sizeof(s));
153         deflateInit(&s, zlib_compression_level);
154
155         if (delta) {
156                 current_depth++;
157                 s.next_in = delta;
158                 s.avail_in = deltalen;
159                 hdrlen = encode_header(OBJ_DELTA, deltalen, hdr);
160                 if (ywrite(packfd, hdr, hdrlen) != hdrlen)
161                         die("Can't write object header: %s", strerror(errno));
162                 if (ywrite(packfd, lastsha1, sizeof(lastsha1)) != sizeof(lastsha1))
163                         die("Can't write object base: %s", strerror(errno));
164                 packoff += hdrlen + sizeof(lastsha1);
165         } else {
166                 current_depth = 0;
167                 s.next_in = dat;
168                 s.avail_in = datlen;
169                 hdrlen = encode_header(OBJ_BLOB, datlen, hdr);
170                 if (ywrite(packfd, hdr, hdrlen) != hdrlen)
171                         die("Can't write object header: %s", strerror(errno));
172                 packoff += hdrlen;
173         }
174
175         s.avail_out = deflateBound(&s, s.avail_in);
176         s.next_out = out = xmalloc(s.avail_out);
177         while (deflate(&s, Z_FINISH) == Z_OK)
178                 /* nothing */;
179         deflateEnd(&s);
180
181         if (ywrite(packfd, out, s.total_out) != s.total_out)
182                 die("Failed writing compressed data %s", strerror(errno));
183         packoff += s.total_out;
184
185         free(out);
186         if (delta)
187                 free(delta);
188 }
189
190 static void init_pack_header()
191 {
192         const char* magic = "PACK";
193         unsigned long version = 2;
194         unsigned long zero = 0;
195
196         version = htonl(version);
197
198         if (ywrite(packfd, (char*)magic, 4) != 4)
199                 die("Can't write pack magic: %s", strerror(errno));
200         if (ywrite(packfd, &version, 4) != 4)
201                 die("Can't write pack version: %s", strerror(errno));
202         if (ywrite(packfd, &zero, 4) != 4)
203                 die("Can't write 0 object count: %s", strerror(errno));
204         packoff = 4 * 3;
205 }
206
207 static void fixup_header_footer()
208 {
209         SHA_CTX c;
210         char hdr[8];
211         unsigned long cnt;
212         char *buf;
213         size_t n;
214
215         if (lseek(packfd, 0, SEEK_SET) != 0)
216                 die("Failed seeking to start: %s", strerror(errno));
217
218         SHA1_Init(&c);
219         if (yread(packfd, hdr, 8) != 8)
220                 die("Failed reading header: %s", strerror(errno));
221         SHA1_Update(&c, hdr, 8);
222
223         cnt = htonl(object_count);
224         SHA1_Update(&c, &cnt, 4);
225         if (ywrite(packfd, &cnt, 4) != 4)
226                 die("Failed writing object count: %s", strerror(errno));
227
228         buf = xmalloc(128 * 1024);
229         for (;;) {
230                 n = xread(packfd, buf, 128 * 1024);
231                 if (n <= 0)
232                         break;
233                 SHA1_Update(&c, buf, n);
234         }
235         free(buf);
236
237         SHA1_Final(packsha1, &c);
238         if (ywrite(packfd, packsha1, sizeof(packsha1)) != sizeof(packsha1))
239                 die("Failed writing pack checksum: %s", strerror(errno));
240 }
241
242 static int oecmp (const void *_a, const void *_b)
243 {
244         struct object_entry *a = *((struct object_entry**)_a);
245         struct object_entry *b = *((struct object_entry**)_b);
246         return memcmp(a->sha1, b->sha1, sizeof(a->sha1));
247 }
248
249 static void write_index(const char *idx_name)
250 {
251         struct sha1file *f;
252         struct object_entry **idx, **c, **last;
253         struct object_entry *e;
254         struct object_entry_block *o;
255         unsigned int array[256];
256         int i;
257
258         /* Build the sorted table of object IDs. */
259         idx = xmalloc(object_count * sizeof(struct object_entry*));
260         c = idx;
261         for (o = blocks; o; o = o->next_block)
262                 for (e = o->entries; e != o->next_free; e++)
263                         *c++ = e;
264         last = idx + object_count;
265         qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
266
267         /* Generate the fan-out array. */
268         c = idx;
269         for (i = 0; i < 256; i++) {
270                 struct object_entry **next = c;;
271                 while (next < last) {
272                         if ((*next)->sha1[0] != i)
273                                 break;
274                         next++;
275                 }
276                 array[i] = htonl(next - idx);
277                 c = next;
278         }
279
280         f = sha1create("%s", idx_name);
281         sha1write(f, array, 256 * sizeof(int));
282         for (c = idx; c != last; c++) {
283                 unsigned int offset = htonl((*c)->offset);
284                 sha1write(f, &offset, 4);
285                 sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
286         }
287         sha1write(f, packsha1, sizeof(packsha1));
288         sha1close(f, NULL, 1);
289         free(idx);
290 }
291
292 int main(int argc, const char **argv)
293 {
294         const char *base_name = argv[1];
295         int est_obj_cnt = atoi(argv[2]);
296         char *pack_name;
297         char *idx_name;
298
299         pack_name = xmalloc(strlen(base_name) + 6);
300         sprintf(pack_name, "%s.pack", base_name);
301         idx_name = xmalloc(strlen(base_name) + 5);
302         sprintf(idx_name, "%s.idx", base_name);
303
304         packfd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666);
305         if (packfd < 0)
306                 die("Can't create pack file %s: %s", pack_name, strerror(errno));
307
308         alloc_objects(est_obj_cnt);
309         init_pack_header();
310         for (;;) {
311                 unsigned long datlen;
312                 int hdrlen;
313                 void *dat;
314                 char hdr[128];
315                 unsigned char sha1[20];
316                 SHA_CTX c;
317                 struct object_entry *e;
318
319                 if (yread(0, &datlen, 4) != 4)
320
321                         break;
322
323                 dat = xmalloc(datlen);
324                 if (yread(0, dat, datlen) != datlen)
325                         break;
326
327                 hdrlen = sprintf(hdr, "blob %lu", datlen) + 1;
328                 SHA1_Init(&c);
329                 SHA1_Update(&c, hdr, hdrlen);
330                 SHA1_Update(&c, dat, datlen);
331                 SHA1_Final(sha1, &c);
332
333                 e = insert_object(sha1);
334                 if (!e->offset) {
335                         e->offset = packoff;
336                         write_blob(dat, datlen);
337                         object_count++;
338
339                         if (lastdat)
340                                 free(lastdat);
341                         lastdat = dat;
342                         lastdatlen = datlen;
343                         memcpy(lastsha1, sha1, sizeof(sha1));
344                 } else {
345                         duplicate_count++;
346                         free(dat);
347                 }
348         }
349         fixup_header_footer();
350         close(packfd);
351         write_index(idx_name);
352
353         fprintf(stderr, "%lu objects, %lu duplicates, %lu allocated (%lu overflow)\n",
354                 object_count, duplicate_count, alloc_count, alloc_count - est_obj_cnt);
355
356         return 0;
357 }