]> asedeno.scripts.mit.edu Git - PuTTY.git/blob - winutils.c
Revamp of command-line handling. Most command line options should
[PuTTY.git] / winutils.c
1 /*
2  * winutils.c: miscellaneous Windows utilities
3  */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7
8 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
9
10 #ifdef TESTMODE
11 /* Definitions to allow this module to be compiled standalone for testing. */
12 #define smalloc malloc
13 #endif
14
15 /*
16  * Split a complete command line into argc/argv, attempting to do
17  * it exactly the same way Windows itself would do it (so that
18  * console utilities, which receive argc and argv from Windows,
19  * will have their command lines processed in the same way as GUI
20  * utilities which get a whole command line and must break it
21  * themselves).
22  * 
23  * Does not modify the input command line (just in case).
24  */
25 void split_into_argv(const char *cmdline, int *argc, char ***argv)
26 {
27     const char *p;
28     char *outputline, *q;
29     char **outputargv;
30     int outputargc;
31
32     /*
33      * At first glance the rules appeared to be:
34      *
35      *  - Single quotes are not special characters.
36      *
37      *  - Double quotes are removed, but within them spaces cease
38      *    to be special.
39      *
40      *  - Backslashes are _only_ special when a sequence of them
41      *    appear just before a double quote. In this situation,
42      *    they are treated like C backslashes: so \" just gives a
43      *    literal quote, \\" gives a literal backslash and then
44      *    opens or closes a double-quoted segment, \\\" gives a
45      *    literal backslash and then a literal quote, \\\\" gives
46      *    two literal backslashes and then opens/closes a
47      *    double-quoted segment, and so forth. Note that this
48      *    behaviour is identical inside and outside double quotes.
49      *
50      *  - Two successive double quotes become one literal double
51      *    quote, but only _inside_ a double-quoted segment.
52      *    Outside, they just form an empty double-quoted segment
53      *    (which may cause an empty argument word).
54      *
55      *  - That only leaves the interesting question of what happens
56      *    when one or more backslashes precedes two or more double
57      *    quotes, starting inside a double-quoted string. And the
58      *    answer to that appears somewhat bizarre. Here I tabulate
59      *    number of backslashes (across the top) against number of
60      *    quotes (down the left), and indicate how many backslashes
61      *    are output, how many quotes are output, and whether a
62      *    quoted segment is open at the end of the sequence:
63      * 
64      *                      backslashes
65      * 
66      *               0         1      2      3      4
67      * 
68      *         0   0,0,y  |  1,0,y  2,0,y  3,0,y  4,0,y
69      *            --------+-----------------------------
70      *         1   0,0,n  |  0,1,y  1,0,n  1,1,y  2,0,n
71      *    q    2   0,1,n  |  0,1,n  1,1,n  1,1,n  2,1,n
72      *    u    3   0,1,y  |  0,2,n  1,1,y  1,2,n  2,1,y
73      *    o    4   0,1,n  |  0,2,y  1,1,n  1,2,y  2,1,n
74      *    t    5   0,2,n  |  0,2,n  1,2,n  1,2,n  2,2,n
75      *    e    6   0,2,y  |  0,3,n  1,2,y  1,3,n  2,2,y
76      *    s    7   0,2,n  |  0,3,y  1,2,n  1,3,y  2,2,n
77      *         8   0,3,n  |  0,3,n  1,3,n  1,3,n  2,3,n
78      *         9   0,3,y  |  0,4,n  1,3,y  1,4,n  2,3,y
79      *        10   0,3,n  |  0,4,y  1,3,n  1,4,y  2,3,n
80      *        11   0,4,n  |  0,4,n  1,4,n  1,4,n  2,4,n
81      * 
82      * 
83      *      [Test fragment was of the form "a\\\"""b c" d.]
84      * 
85      * There is very weird mod-3 behaviour going on here in the
86      * number of quotes, and it even applies when there aren't any
87      * backslashes! How ghastly.
88      * 
89      * With a bit of thought, this extremely odd diagram suddenly
90      * coalesced itself into a coherent, if still ghastly, model of
91      * how things work:
92      * 
93      *  - As before, backslashes are only special when one or more
94      *    of them appear contiguously before at least one double
95      *    quote. In this situation the backslashes do exactly what
96      *    you'd expect: each one quotes the next thing in front of
97      *    it, so you end up with n/2 literal backslashes (if n is
98      *    even) or (n-1)/2 literal backslashes and a literal quote
99      *    (if n is odd). In the latter case the double quote
100      *    character right after the backslashes is used up.
101      * 
102      *  - After that, any remaining double quotes are processed. A
103      *    string of contiguous unescaped double quotes has a mod-3
104      *    behaviour:
105      * 
106      *     * inside a quoted segment, a quote ends the segment.
107      *     * _immediately_ after ending a quoted segment, a quote
108      *       simply produces a literal quote.
109      *     * otherwise, outside a quoted segment, a quote begins a
110      *       quoted segment.
111      * 
112      *    So, for example, if we started inside a quoted segment
113      *    then two contiguous quotes would close the segment and
114      *    produce a literal quote; three would close the segment,
115      *    produce a literal quote, and open a new segment. If we
116      *    started outside a quoted segment, then two contiguous
117      *    quotes would open and then close a segment, producing no
118      *    output (but potentially creating a zero-length argument);
119      *    but three quotes would open and close a segment and then
120      *    produce a literal quote.
121      */
122
123     /*
124      * This will guaranteeably be big enough; we can realloc it
125      * down later.
126      */
127     outputline = malloc(1+strlen(cmdline));
128     outputargv = malloc(sizeof(char *) * (strlen(cmdline)+1 / 2));
129
130     p = cmdline; q = outputline; outputargc = 0;
131
132     while (*p) {
133         int quote;
134
135         /* Skip whitespace searching for start of argument. */
136         while (*p && isspace(*p)) p++;
137         if (!*p) break;
138
139         /* We have an argument; start it. */
140         outputargv[outputargc++] = q;
141         quote = 0;
142
143         /* Copy data into the argument until it's finished. */
144         while (*p) {
145             if (!quote && isspace(*p))
146                 break;                 /* argument is finished */
147
148             if (*p == '"' || *p == '\\') {
149                 /*
150                  * We have a sequence of zero or more backslashes
151                  * followed by a sequence of zero or more quotes.
152                  * Count up how many of each, and then deal with
153                  * them as appropriate.
154                  */
155                 int i, slashes = 0, quotes = 0;
156                 while (*p == '\\') slashes++, p++;
157                 while (*p == '"') quotes++, p++;
158
159                 if (!quotes) {
160                     /*
161                      * Special case: if there are no quotes,
162                      * slashes are not special at all, so just copy
163                      * n slashes to the output string.
164                      */
165                     while (slashes--) *q++ = '\\';
166                 } else {
167                     /* Slashes annihilate in pairs. */
168                     while (slashes >= 2) slashes -= 2, *q++ = '\\';
169
170                     /* One remaining slash takes out the first quote. */
171                     if (slashes) quotes--, *q++ = '"';
172
173                     if (quotes > 0) {
174                         /* Outside a quote segment, a quote starts one. */
175                         if (!quote) quotes--, quote = 1;
176
177                         /* Now we produce (n+1)/3 literal quotes... */
178                         for (i = 3; i <= quotes+1; i += 3) *q++ = '"';
179
180                         /* ... and end in a quote segment iff 3 divides n. */
181                         quote = (quotes % 3 == 0);
182                     }
183                 }
184             } else {
185                 *q++ = *p++;
186             }
187         }
188
189         /* At the end of an argument, just append a trailing NUL. */
190         *q++ = '\0';
191     }
192
193     outputargv = realloc(outputargv, sizeof(char *) * outputargc);
194
195     if (argc) *argc = outputargc;
196     if (argv) *argv = outputargv;
197 }
198
199 #ifdef TESTMODE
200
201 const struct argv_test {
202     const char *cmdline;
203     const char *argv[10];
204 } argv_tests[] = {
205     /*
206      * We generate this set of tests by invoking ourself with
207      * `-generate'.
208      */
209     {"ab c\" d", {"ab", "c d", NULL}},
210     {"a\"b c\" d", {"ab c", "d", NULL}},
211     {"a\"\"b c\" d", {"ab", "c d", NULL}},
212     {"a\"\"\"b c\" d", {"a\"b", "c d", NULL}},
213     {"a\"\"\"\"b c\" d", {"a\"b c", "d", NULL}},
214     {"a\"\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
215     {"a\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
216     {"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
217     {"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
218     {"a\\b c\" d", {"a\\b", "c d", NULL}},
219     {"a\\\"b c\" d", {"a\"b", "c d", NULL}},
220     {"a\\\"\"b c\" d", {"a\"b c", "d", NULL}},
221     {"a\\\"\"\"b c\" d", {"a\"b", "c d", NULL}},
222     {"a\\\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
223     {"a\\\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
224     {"a\\\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
225     {"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
226     {"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
227     {"a\\\\b c\" d", {"a\\\\b", "c d", NULL}},
228     {"a\\\\\"b c\" d", {"a\\b c", "d", NULL}},
229     {"a\\\\\"\"b c\" d", {"a\\b", "c d", NULL}},
230     {"a\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
231     {"a\\\\\"\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
232     {"a\\\\\"\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
233     {"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
234     {"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
235     {"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
236     {"a\\\\\\b c\" d", {"a\\\\\\b", "c d", NULL}},
237     {"a\\\\\\\"b c\" d", {"a\\\"b", "c d", NULL}},
238     {"a\\\\\\\"\"b c\" d", {"a\\\"b c", "d", NULL}},
239     {"a\\\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
240     {"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
241     {"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
242     {"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
243     {"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
244     {"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
245     {"a\\\\\\\\b c\" d", {"a\\\\\\\\b", "c d", NULL}},
246     {"a\\\\\\\\\"b c\" d", {"a\\\\b c", "d", NULL}},
247     {"a\\\\\\\\\"\"b c\" d", {"a\\\\b", "c d", NULL}},
248     {"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
249     {"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
250     {"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
251     {"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
252     {"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
253     {"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
254     {"\"ab c\" d", {"ab c", "d", NULL}},
255     {"\"a\"b c\" d", {"ab", "c d", NULL}},
256     {"\"a\"\"b c\" d", {"a\"b", "c d", NULL}},
257     {"\"a\"\"\"b c\" d", {"a\"b c", "d", NULL}},
258     {"\"a\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
259     {"\"a\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
260     {"\"a\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
261     {"\"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
262     {"\"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
263     {"\"a\\b c\" d", {"a\\b c", "d", NULL}},
264     {"\"a\\\"b c\" d", {"a\"b c", "d", NULL}},
265     {"\"a\\\"\"b c\" d", {"a\"b", "c d", NULL}},
266     {"\"a\\\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
267     {"\"a\\\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
268     {"\"a\\\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
269     {"\"a\\\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
270     {"\"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
271     {"\"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
272     {"\"a\\\\b c\" d", {"a\\\\b c", "d", NULL}},
273     {"\"a\\\\\"b c\" d", {"a\\b", "c d", NULL}},
274     {"\"a\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
275     {"\"a\\\\\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
276     {"\"a\\\\\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
277     {"\"a\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
278     {"\"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
279     {"\"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
280     {"\"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
281     {"\"a\\\\\\b c\" d", {"a\\\\\\b c", "d", NULL}},
282     {"\"a\\\\\\\"b c\" d", {"a\\\"b c", "d", NULL}},
283     {"\"a\\\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
284     {"\"a\\\\\\\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
285     {"\"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
286     {"\"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
287     {"\"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
288     {"\"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
289     {"\"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
290     {"\"a\\\\\\\\b c\" d", {"a\\\\\\\\b c", "d", NULL}},
291     {"\"a\\\\\\\\\"b c\" d", {"a\\\\b", "c d", NULL}},
292     {"\"a\\\\\\\\\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
293     {"\"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
294     {"\"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
295     {"\"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
296     {"\"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
297     {"\"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
298     {"\"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b", "c d", NULL}},
299 };
300
301 int main(int argc, char **argv)
302 {
303     int i, j;
304
305     if (argc > 1) {
306         /*
307          * Generation of tests.
308          * 
309          * Given `-splat <args>', we print out a C-style
310          * representation of each argument (in the form "a", "b",
311          * NULL), backslash-escaping each backslash and double
312          * quote.
313          * 
314          * Given `-split <string>', we first doctor `string' by
315          * turning forward slashes into backslashes, single quotes
316          * into double quotes and underscores into spaces; and then
317          * we feed the resulting string to ourself with `-splat'.
318          * 
319          * Given `-generate', we concoct a variety of fun test
320          * cases, encode them in quote-safe form (mapping \, " and
321          * space to /, ' and _ respectively) and feed each one to
322          * `-split'.
323          */
324         if (!strcmp(argv[1], "-splat")) {
325             int i;
326             char *p;
327             for (i = 2; i < argc; i++) {
328                 putchar('"');
329                 for (p = argv[i]; *p; p++) {
330                     if (*p == '\\' || *p == '"')
331                         putchar('\\');
332                     putchar(*p);
333                 }
334                 printf("\", ");
335             }
336             printf("NULL");
337             return 0;
338         }
339
340         if (!strcmp(argv[1], "-split") && argc > 2) {
341             char *str = malloc(20 + strlen(argv[0]) + strlen(argv[2]));
342             char *p, *q;
343
344             q = str + sprintf(str, "%s -splat ", argv[0]);
345             printf("    {\"");
346             for (p = argv[2]; *p; p++, q++) {
347                 switch (*p) {
348                   case '/':  printf("\\\\"); *q = '\\'; break;
349                   case '\'': printf("\\\""); *q = '"';  break;
350                   case '_':  printf(" ");    *q = ' ';  break;
351                   default:   putchar(*p);    *q = *p;   break;
352                 }
353             }
354             *p = '\0';
355             printf("\", {");
356             fflush(stdout);
357
358             system(str);
359
360             printf("}},\n");
361
362             return 0;
363         }
364
365         if (!strcmp(argv[1], "-generate")) {
366             char *teststr, *p;
367             int i, initialquote, backslashes, quotes;
368
369             teststr = malloc(200 + strlen(argv[0]));
370
371             for (initialquote = 0; initialquote <= 1; initialquote++) {
372                 for (backslashes = 0; backslashes < 5; backslashes++) {
373                     for (quotes = 0; quotes < 9; quotes++) {
374                         p = teststr + sprintf(teststr, "%s -split ", argv[0]);
375                         if (initialquote) *p++ = '\'';
376                         *p++ = 'a';
377                         for (i = 0; i < backslashes; i++) *p++ = '/';
378                         for (i = 0; i < quotes; i++) *p++ = '\'';
379                         *p++ = 'b';
380                         *p++ = '_';
381                         *p++ = 'c';
382                         *p++ = '\'';
383                         *p++ = '_';
384                         *p++ = 'd';
385                         *p = '\0';
386
387                         system(teststr);
388                     }
389                 }
390             }
391             return 0;
392         }
393
394         fprintf(stderr, "unrecognised option: \"%s\"\n", argv[1]);
395         return 1;
396     }
397
398     /*
399      * If we get here, we were invoked with no arguments, so just
400      * run the tests.
401      */
402
403     for (i = 0; i < lenof(argv_tests); i++) {
404         int ac;
405         char **av;
406
407         split_into_argv(argv_tests[i].cmdline, &ac, &av);
408
409         for (j = 0; j < ac && argv_tests[i].argv[j]; j++) {
410             if (strcmp(av[j], argv_tests[i].argv[j])) {
411                 printf("failed test %d (|%s|) arg %d: |%s| should be |%s|\n",
412                        i, argv_tests[i].cmdline,
413                        j, av[j], argv_tests[i].argv[j]);
414             }
415 #ifdef VERBOSE
416             else {
417                 printf("test %d (|%s|) arg %d: |%s| == |%s|\n",
418                        i, argv_tests[i].cmdline,
419                        j, av[j], argv_tests[i].argv[j]);
420             }
421 #endif
422         }
423         if (j < ac)
424             printf("failed test %d (|%s|): %d args returned, should be %d\n",
425                    i, argv_tests[i].cmdline, ac, j);
426         if (argv_tests[i].argv[j])
427             printf("failed test %d (|%s|): %d args returned, should be more\n",
428                    i, argv_tests[i].cmdline, ac);
429     }
430
431     return 0;
432 }
433
434 #endif