X-Git-Url: https://asedeno.scripts.mit.edu/gitweb/?a=blobdiff_plain;f=convert.c;h=01de9a84c21b31a0120065a32a386f27321cdf7b;hb=a33fb40fe477fa1a9ce882230415f19378959390;hp=491e7141b4ea29b3cf754cbaf2656a0c3ca8c46c;hpb=a07b10c8f930e88386d3b7424f25190af554275e;p=git.git diff --git a/convert.c b/convert.c index 491e7141b..01de9a84c 100644 --- a/convert.c +++ b/convert.c @@ -8,13 +8,17 @@ * This should use the pathname to decide on whether it wants to do some * more interesting conversions (automatic gzip/unzip, general format * conversions etc etc), but by default it just does automatic CRLF<->LF - * translation when the "auto_crlf" option is set. + * translation when the "text" attribute or "auto_crlf" option is set. */ -#define CRLF_GUESS (-1) -#define CRLF_BINARY 0 -#define CRLF_TEXT 1 -#define CRLF_INPUT 2 +enum action { + CRLF_GUESS = -1, + CRLF_BINARY = 0, + CRLF_TEXT, + CRLF_INPUT, + CRLF_CRLF, + CRLF_AUTO, +}; struct text_stat { /* NUL, CR, LF and CRLF counts */ @@ -89,49 +93,112 @@ static int is_binary(unsigned long size, struct text_stat *stats) return 0; } -static void check_safe_crlf(const char *path, int action, +static enum eol determine_output_conversion(enum action action) +{ + switch (action) { + case CRLF_BINARY: + return EOL_UNSET; + case CRLF_CRLF: + return EOL_CRLF; + case CRLF_INPUT: + return EOL_LF; + case CRLF_GUESS: + if (!auto_crlf) + return EOL_UNSET; + /* fall through */ + case CRLF_TEXT: + case CRLF_AUTO: + if (auto_crlf == AUTO_CRLF_TRUE) + return EOL_CRLF; + else if (auto_crlf == AUTO_CRLF_INPUT) + return EOL_LF; + else if (eol == EOL_UNSET) + return EOL_NATIVE; + } + return eol; +} + +static void check_safe_crlf(const char *path, enum action action, struct text_stat *stats, enum safe_crlf checksafe) { if (!checksafe) return; - if (action == CRLF_INPUT || auto_crlf <= 0) { + if (determine_output_conversion(action) == EOL_LF) { /* * CRLFs would not be restored by checkout: * check if we'd remove CRLFs */ if (stats->crlf) { if (checksafe == SAFE_CRLF_WARN) - warning("CRLF will be replaced by LF in %s.", path); + warning("CRLF will be replaced by LF in %s.\nThe file will have its original line endings in your working directory.", path); else /* i.e. SAFE_CRLF_FAIL */ die("CRLF would be replaced by LF in %s.", path); } - } else if (auto_crlf > 0) { + } else if (determine_output_conversion(action) == EOL_CRLF) { /* * CRLFs would be added by checkout: * check if we have "naked" LFs */ if (stats->lf != stats->crlf) { if (checksafe == SAFE_CRLF_WARN) - warning("LF will be replaced by CRLF in %s", path); + warning("LF will be replaced by CRLF in %s.\nThe file will have its original line endings in your working directory.", path); else /* i.e. SAFE_CRLF_FAIL */ die("LF would be replaced by CRLF in %s", path); } } } +static int has_cr_in_index(const char *path) +{ + int pos, len; + unsigned long sz; + enum object_type type; + void *data; + int has_cr; + struct index_state *istate = &the_index; + + len = strlen(path); + pos = index_name_pos(istate, path, len); + if (pos < 0) { + /* + * We might be in the middle of a merge, in which + * case we would read stage #2 (ours). + */ + int i; + for (i = -pos - 1; + (pos < 0 && i < istate->cache_nr && + !strcmp(istate->cache[i]->name, path)); + i++) + if (ce_stage(istate->cache[i]) == 2) + pos = i; + } + if (pos < 0) + return 0; + data = read_sha1_file(istate->cache[pos]->sha1, &type, &sz); + if (!data || type != OBJ_BLOB) { + free(data); + return 0; + } + + has_cr = memchr(data, '\r', sz) != NULL; + free(data); + return has_cr; +} + static int crlf_to_git(const char *path, const char *src, size_t len, - struct strbuf *buf, int action, enum safe_crlf checksafe) + struct strbuf *buf, enum action action, enum safe_crlf checksafe) { struct text_stat stats; char *dst; - if ((action == CRLF_BINARY) || !auto_crlf || !len) + if (action == CRLF_BINARY || + (action == CRLF_GUESS && auto_crlf == AUTO_CRLF_FALSE) || !len) return 0; gather_stats(src, len, &stats); - if (action == CRLF_GUESS) { + if (action == CRLF_AUTO || action == CRLF_GUESS) { /* * We're currently not going to even try to convert stuff * that has bare CR characters. Does anybody do that crazy @@ -145,6 +212,15 @@ static int crlf_to_git(const char *path, const char *src, size_t len, */ if (is_binary(len, &stats)) return 0; + + if (action == CRLF_GUESS) { + /* + * If the file in the index has any CR in it, do not convert. + * This is the new safer autocrlf handling. + */ + if (has_cr_in_index(path)) + return 0; + } } check_safe_crlf(path, action, &stats, checksafe); @@ -157,7 +233,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len, if (strbuf_avail(buf) + buf->len < len) strbuf_grow(buf, len - buf->len); dst = buf->buf; - if (action == CRLF_GUESS) { + if (action == CRLF_AUTO || action == CRLF_GUESS) { /* * If we guessed, we already know we rejected a file with * lone CR, and we can strip a CR without looking at what @@ -180,16 +256,12 @@ static int crlf_to_git(const char *path, const char *src, size_t len, } static int crlf_to_worktree(const char *path, const char *src, size_t len, - struct strbuf *buf, int action) + struct strbuf *buf, enum action action) { char *to_free = NULL; struct text_stat stats; - if ((action == CRLF_BINARY) || (action == CRLF_INPUT) || - auto_crlf <= 0) - return 0; - - if (!len) + if (!len || determine_output_conversion(action) != EOL_CRLF) return 0; gather_stats(src, len, &stats); @@ -202,7 +274,14 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len, if (stats.lf == stats.crlf) return 0; - if (action == CRLF_GUESS) { + if (action == CRLF_AUTO || action == CRLF_GUESS) { + if (action == CRLF_GUESS) { + /* If we have any CR or CRLF line endings, we do not touch it */ + /* This is the new safer autocrlf-handling */ + if (stats.cr > 0 || stats.crlf > 0) + return 0; + } + /* If we have any bare CR characters, we're not going to touch it */ if (stats.cr != stats.crlf) return 0; @@ -241,7 +320,7 @@ struct filter_params { const char *cmd; }; -static int filter_buffer(int fd, void *data) +static int filter_buffer(int in, int out, void *data) { /* * Spawn cmd and feed the buffer contents through its stdin. @@ -249,12 +328,15 @@ static int filter_buffer(int fd, void *data) struct child_process child_process; struct filter_params *params = (struct filter_params *)data; int write_err, status; - const char *argv[] = { "sh", "-c", params->cmd, NULL }; + const char *argv[] = { NULL, NULL }; + + argv[0] = params->cmd; memset(&child_process, 0, sizeof(child_process)); child_process.argv = argv; + child_process.use_shell = 1; child_process.in = -1; - child_process.out = fd; + child_process.out = out; if (start_command(&child_process)) return error("cannot fork to run external filter %s", params->cmd); @@ -291,6 +373,7 @@ static int apply_filter(const char *path, const char *src, size_t len, memset(&async, 0, sizeof(async)); async.proc = filter_buffer; async.data = ¶ms; + async.out = -1; params.src = src; params.size = len; params.cmd = cmd; @@ -372,20 +455,26 @@ static int read_convert_config(const char *var, const char *value, void *cb) static void setup_convert_check(struct git_attr_check *check) { + static struct git_attr *attr_text; static struct git_attr *attr_crlf; + static struct git_attr *attr_eol; static struct git_attr *attr_ident; static struct git_attr *attr_filter; - if (!attr_crlf) { - attr_crlf = git_attr("crlf", 4); - attr_ident = git_attr("ident", 5); - attr_filter = git_attr("filter", 6); + if (!attr_text) { + attr_text = git_attr("text"); + attr_crlf = git_attr("crlf"); + attr_eol = git_attr("eol"); + attr_ident = git_attr("ident"); + attr_filter = git_attr("filter"); user_convert_tail = &user_convert; git_config(read_convert_config, NULL); } check[0].attr = attr_crlf; check[1].attr = attr_ident; check[2].attr = attr_filter; + check[3].attr = attr_eol; + check[4].attr = attr_text; } static int count_ident(const char *cp, unsigned long size) @@ -423,6 +512,8 @@ static int count_ident(const char *cp, unsigned long size) cnt++; break; } + if (ch == '\n') + break; } } return cnt; @@ -453,6 +544,11 @@ static int ident_to_git(const char *path, const char *src, size_t len, dollar = memchr(src + 3, '$', len - 3); if (!dollar) break; + if (memchr(src + 3, '\n', dollar - src - 3)) { + /* Line break before the next dollar. */ + continue; + } + memcpy(dst, "Id$", 3); dst += 3; len -= dollar + 1 - src; @@ -468,7 +564,7 @@ static int ident_to_worktree(const char *path, const char *src, size_t len, struct strbuf *buf, int ident) { unsigned char sha1[20]; - char *to_free = NULL, *dollar; + char *to_free = NULL, *dollar, *spc; int cnt; if (!ident) @@ -504,7 +600,10 @@ static int ident_to_worktree(const char *path, const char *src, size_t len, } else if (src[2] == ':') { /* * It's possible that an expanded Id has crept its way into the - * repository, we cope with that by stripping the expansion out + * repository, we cope with that by stripping the expansion out. + * This is probably not a good idea, since it will cause changes + * on checkout, which won't go away by stash, but let's keep it + * for git-style ids. */ dollar = memchr(src + 3, '$', len - 3); if (!dollar) { @@ -512,6 +611,20 @@ static int ident_to_worktree(const char *path, const char *src, size_t len, break; } + if (memchr(src + 3, '\n', dollar - src - 3)) { + /* Line break before the next dollar. */ + continue; + } + + spc = memchr(src + 4, ' ', dollar - src - 4); + if (spc && spc < dollar-1) { + /* There are spaces in unexpected places. + * This is probably an id from some other + * versioning system. Keep it for now. + */ + continue; + } + len -= dollar + 1 - src; src = dollar + 1; } else { @@ -542,9 +655,24 @@ static int git_path_check_crlf(const char *path, struct git_attr_check *check) ; else if (!strcmp(value, "input")) return CRLF_INPUT; + else if (!strcmp(value, "auto")) + return CRLF_AUTO; return CRLF_GUESS; } +static int git_path_check_eol(const char *path, struct git_attr_check *check) +{ + const char *value = check->value; + + if (ATTR_UNSET(value)) + ; + else if (!strcmp(value, "lf")) + return EOL_LF; + else if (!strcmp(value, "crlf")) + return EOL_CRLF; + return EOL_UNSET; +} + static struct convert_driver *git_path_check_convert(const char *path, struct git_attr_check *check) { @@ -566,20 +694,35 @@ static int git_path_check_ident(const char *path, struct git_attr_check *check) return !!ATTR_TRUE(value); } +static enum action determine_action(enum action text_attr, enum eol eol_attr) +{ + if (text_attr == CRLF_BINARY) + return CRLF_BINARY; + if (eol_attr == EOL_LF) + return CRLF_INPUT; + if (eol_attr == EOL_CRLF) + return CRLF_CRLF; + return text_attr; +} + int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst, enum safe_crlf checksafe) { - struct git_attr_check check[3]; - int crlf = CRLF_GUESS; + struct git_attr_check check[5]; + enum action action = CRLF_GUESS; + enum eol eol_attr = EOL_UNSET; int ident = 0, ret = 0; const char *filter = NULL; setup_convert_check(check); if (!git_checkattr(path, ARRAY_SIZE(check), check)) { struct convert_driver *drv; - crlf = git_path_check_crlf(path, check + 0); + action = git_path_check_crlf(path, check + 4); + if (action == CRLF_GUESS) + action = git_path_check_crlf(path, check + 0); ident = git_path_check_ident(path, check + 1); drv = git_path_check_convert(path, check + 2); + eol_attr = git_path_check_eol(path, check + 3); if (drv && drv->clean) filter = drv->clean; } @@ -589,7 +732,8 @@ int convert_to_git(const char *path, const char *src, size_t len, src = dst->buf; len = dst->len; } - ret |= crlf_to_git(path, src, len, dst, crlf, checksafe); + action = determine_action(action, eol_attr); + ret |= crlf_to_git(path, src, len, dst, action, checksafe); if (ret) { src = dst->buf; len = dst->len; @@ -597,19 +741,25 @@ int convert_to_git(const char *path, const char *src, size_t len, return ret | ident_to_git(path, src, len, dst, ident); } -int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst) +static int convert_to_working_tree_internal(const char *path, const char *src, + size_t len, struct strbuf *dst, + int normalizing) { - struct git_attr_check check[3]; - int crlf = CRLF_GUESS; + struct git_attr_check check[5]; + enum action action = CRLF_GUESS; + enum eol eol_attr = EOL_UNSET; int ident = 0, ret = 0; const char *filter = NULL; setup_convert_check(check); if (!git_checkattr(path, ARRAY_SIZE(check), check)) { struct convert_driver *drv; - crlf = git_path_check_crlf(path, check + 0); + action = git_path_check_crlf(path, check + 4); + if (action == CRLF_GUESS) + action = git_path_check_crlf(path, check + 0); ident = git_path_check_ident(path, check + 1); drv = git_path_check_convert(path, check + 2); + eol_attr = git_path_check_eol(path, check + 3); if (drv && drv->smudge) filter = drv->smudge; } @@ -619,10 +769,32 @@ int convert_to_working_tree(const char *path, const char *src, size_t len, struc src = dst->buf; len = dst->len; } - ret |= crlf_to_worktree(path, src, len, dst, crlf); + /* + * CRLF conversion can be skipped if normalizing, unless there + * is a smudge filter. The filter might expect CRLFs. + */ + if (filter || !normalizing) { + action = determine_action(action, eol_attr); + ret |= crlf_to_worktree(path, src, len, dst, action); + if (ret) { + src = dst->buf; + len = dst->len; + } + } + return ret | apply_filter(path, src, len, dst, filter); +} + +int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst) +{ + return convert_to_working_tree_internal(path, src, len, dst, 0); +} + +int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst) +{ + int ret = convert_to_working_tree_internal(path, src, len, dst, 1); if (ret) { src = dst->buf; len = dst->len; } - return ret | apply_filter(path, src, len, dst, filter); + return ret | convert_to_git(path, src, len, dst, 0); }