]> asedeno.scripts.mit.edu Git - PuTTY.git/blob - windows/winutils.c
Various changes related to the Subversion migration.
[PuTTY.git] / windows / winutils.c
1 /*
2  * winutils.c: miscellaneous Windows utilities for GUI apps
3  */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <ctype.h>
8
9 #include "misc.h"
10
11 #ifdef TESTMODE
12 /* Definitions to allow this module to be compiled standalone for testing. */
13 #define smalloc malloc
14 #define srealloc realloc
15 #define sfree free
16 #endif
17
18 /*
19  * Split a complete command line into argc/argv, attempting to do
20  * it exactly the same way Windows itself would do it (so that
21  * console utilities, which receive argc and argv from Windows,
22  * will have their command lines processed in the same way as GUI
23  * utilities which get a whole command line and must break it
24  * themselves).
25  * 
26  * Does not modify the input command line.
27  * 
28  * The final parameter (argstart) is used to return a second array
29  * of char * pointers, the same length as argv, each one pointing
30  * at the start of the corresponding element of argv in the
31  * original command line. So if you get half way through processing
32  * your command line in argc/argv form and then decide you want to
33  * treat the rest as a raw string, you can. If you don't want to,
34  * `argstart' can be safely left NULL.
35  */
36 void split_into_argv(char *cmdline, int *argc, char ***argv,
37                      char ***argstart)
38 {
39     char *p;
40     char *outputline, *q;
41     char **outputargv, **outputargstart;
42     int outputargc;
43
44     /*
45      * At first glance the rules appeared to be:
46      *
47      *  - Single quotes are not special characters.
48      *
49      *  - Double quotes are removed, but within them spaces cease
50      *    to be special.
51      *
52      *  - Backslashes are _only_ special when a sequence of them
53      *    appear just before a double quote. In this situation,
54      *    they are treated like C backslashes: so \" just gives a
55      *    literal quote, \\" gives a literal backslash and then
56      *    opens or closes a double-quoted segment, \\\" gives a
57      *    literal backslash and then a literal quote, \\\\" gives
58      *    two literal backslashes and then opens/closes a
59      *    double-quoted segment, and so forth. Note that this
60      *    behaviour is identical inside and outside double quotes.
61      *
62      *  - Two successive double quotes become one literal double
63      *    quote, but only _inside_ a double-quoted segment.
64      *    Outside, they just form an empty double-quoted segment
65      *    (which may cause an empty argument word).
66      *
67      *  - That only leaves the interesting question of what happens
68      *    when one or more backslashes precedes two or more double
69      *    quotes, starting inside a double-quoted string. And the
70      *    answer to that appears somewhat bizarre. Here I tabulate
71      *    number of backslashes (across the top) against number of
72      *    quotes (down the left), and indicate how many backslashes
73      *    are output, how many quotes are output, and whether a
74      *    quoted segment is open at the end of the sequence:
75      * 
76      *                      backslashes
77      * 
78      *               0         1      2      3      4
79      * 
80      *         0   0,0,y  |  1,0,y  2,0,y  3,0,y  4,0,y
81      *            --------+-----------------------------
82      *         1   0,0,n  |  0,1,y  1,0,n  1,1,y  2,0,n
83      *    q    2   0,1,n  |  0,1,n  1,1,n  1,1,n  2,1,n
84      *    u    3   0,1,y  |  0,2,n  1,1,y  1,2,n  2,1,y
85      *    o    4   0,1,n  |  0,2,y  1,1,n  1,2,y  2,1,n
86      *    t    5   0,2,n  |  0,2,n  1,2,n  1,2,n  2,2,n
87      *    e    6   0,2,y  |  0,3,n  1,2,y  1,3,n  2,2,y
88      *    s    7   0,2,n  |  0,3,y  1,2,n  1,3,y  2,2,n
89      *         8   0,3,n  |  0,3,n  1,3,n  1,3,n  2,3,n
90      *         9   0,3,y  |  0,4,n  1,3,y  1,4,n  2,3,y
91      *        10   0,3,n  |  0,4,y  1,3,n  1,4,y  2,3,n
92      *        11   0,4,n  |  0,4,n  1,4,n  1,4,n  2,4,n
93      * 
94      * 
95      *      [Test fragment was of the form "a\\\"""b c" d.]
96      * 
97      * There is very weird mod-3 behaviour going on here in the
98      * number of quotes, and it even applies when there aren't any
99      * backslashes! How ghastly.
100      * 
101      * With a bit of thought, this extremely odd diagram suddenly
102      * coalesced itself into a coherent, if still ghastly, model of
103      * how things work:
104      * 
105      *  - As before, backslashes are only special when one or more
106      *    of them appear contiguously before at least one double
107      *    quote. In this situation the backslashes do exactly what
108      *    you'd expect: each one quotes the next thing in front of
109      *    it, so you end up with n/2 literal backslashes (if n is
110      *    even) or (n-1)/2 literal backslashes and a literal quote
111      *    (if n is odd). In the latter case the double quote
112      *    character right after the backslashes is used up.
113      * 
114      *  - After that, any remaining double quotes are processed. A
115      *    string of contiguous unescaped double quotes has a mod-3
116      *    behaviour:
117      * 
118      *     * inside a quoted segment, a quote ends the segment.
119      *     * _immediately_ after ending a quoted segment, a quote
120      *       simply produces a literal quote.
121      *     * otherwise, outside a quoted segment, a quote begins a
122      *       quoted segment.
123      * 
124      *    So, for example, if we started inside a quoted segment
125      *    then two contiguous quotes would close the segment and
126      *    produce a literal quote; three would close the segment,
127      *    produce a literal quote, and open a new segment. If we
128      *    started outside a quoted segment, then two contiguous
129      *    quotes would open and then close a segment, producing no
130      *    output (but potentially creating a zero-length argument);
131      *    but three quotes would open and close a segment and then
132      *    produce a literal quote.
133      */
134
135     /*
136      * First deal with the simplest of all special cases: if there
137      * aren't any arguments, return 0,NULL,NULL.
138      */
139     while (*cmdline && isspace(*cmdline)) cmdline++;
140     if (!*cmdline) {
141         if (argc) *argc = 0;
142         if (argv) *argv = NULL;
143         if (argstart) *argstart = NULL;
144         return;
145     }
146
147     /*
148      * This will guaranteeably be big enough; we can realloc it
149      * down later.
150      */
151     outputline = snewn(1+strlen(cmdline), char);
152     outputargv = snewn(strlen(cmdline)+1 / 2, char *);
153     outputargstart = snewn(strlen(cmdline)+1 / 2, char *);
154
155     p = cmdline; q = outputline; outputargc = 0;
156
157     while (*p) {
158         int quote;
159
160         /* Skip whitespace searching for start of argument. */
161         while (*p && isspace(*p)) p++;
162         if (!*p) break;
163
164         /* We have an argument; start it. */
165         outputargv[outputargc] = q;
166         outputargstart[outputargc] = p;
167         outputargc++;
168         quote = 0;
169
170         /* Copy data into the argument until it's finished. */
171         while (*p) {
172             if (!quote && isspace(*p))
173                 break;                 /* argument is finished */
174
175             if (*p == '"' || *p == '\\') {
176                 /*
177                  * We have a sequence of zero or more backslashes
178                  * followed by a sequence of zero or more quotes.
179                  * Count up how many of each, and then deal with
180                  * them as appropriate.
181                  */
182                 int i, slashes = 0, quotes = 0;
183                 while (*p == '\\') slashes++, p++;
184                 while (*p == '"') quotes++, p++;
185
186                 if (!quotes) {
187                     /*
188                      * Special case: if there are no quotes,
189                      * slashes are not special at all, so just copy
190                      * n slashes to the output string.
191                      */
192                     while (slashes--) *q++ = '\\';
193                 } else {
194                     /* Slashes annihilate in pairs. */
195                     while (slashes >= 2) slashes -= 2, *q++ = '\\';
196
197                     /* One remaining slash takes out the first quote. */
198                     if (slashes) quotes--, *q++ = '"';
199
200                     if (quotes > 0) {
201                         /* Outside a quote segment, a quote starts one. */
202                         if (!quote) quotes--, quote = 1;
203
204                         /* Now we produce (n+1)/3 literal quotes... */
205                         for (i = 3; i <= quotes+1; i += 3) *q++ = '"';
206
207                         /* ... and end in a quote segment iff 3 divides n. */
208                         quote = (quotes % 3 == 0);
209                     }
210                 }
211             } else {
212                 *q++ = *p++;
213             }
214         }
215
216         /* At the end of an argument, just append a trailing NUL. */
217         *q++ = '\0';
218     }
219
220     outputargv = sresize(outputargv, outputargc, char *);
221     outputargstart = sresize(outputargstart, outputargc, char *);
222
223     if (argc) *argc = outputargc;
224     if (argv) *argv = outputargv; else sfree(outputargv);
225     if (argstart) *argstart = outputargstart; else sfree(outputargstart);
226 }
227
228 #ifdef TESTMODE
229
230 const struct argv_test {
231     const char *cmdline;
232     const char *argv[10];
233 } argv_tests[] = {
234     /*
235      * We generate this set of tests by invoking ourself with
236      * `-generate'.
237      */
238     {"ab c\" d", {"ab", "c d", NULL}},
239     {"a\"b c\" d", {"ab c", "d", NULL}},
240     {"a\"\"b c\" d", {"ab", "c d", NULL}},
241     {"a\"\"\"b c\" d", {"a\"b", "c d", NULL}},
242     {"a\"\"\"\"b c\" d", {"a\"b c", "d", NULL}},
243     {"a\"\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
244     {"a\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
245     {"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
246     {"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
247     {"a\\b c\" d", {"a\\b", "c d", NULL}},
248     {"a\\\"b c\" d", {"a\"b", "c d", NULL}},
249     {"a\\\"\"b c\" d", {"a\"b c", "d", NULL}},
250     {"a\\\"\"\"b c\" d", {"a\"b", "c d", NULL}},
251     {"a\\\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
252     {"a\\\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
253     {"a\\\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
254     {"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
255     {"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
256     {"a\\\\b c\" d", {"a\\\\b", "c d", NULL}},
257     {"a\\\\\"b c\" d", {"a\\b c", "d", NULL}},
258     {"a\\\\\"\"b c\" d", {"a\\b", "c d", NULL}},
259     {"a\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
260     {"a\\\\\"\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
261     {"a\\\\\"\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
262     {"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
263     {"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
264     {"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
265     {"a\\\\\\b c\" d", {"a\\\\\\b", "c d", NULL}},
266     {"a\\\\\\\"b c\" d", {"a\\\"b", "c d", NULL}},
267     {"a\\\\\\\"\"b c\" d", {"a\\\"b c", "d", NULL}},
268     {"a\\\\\\\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
269     {"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
270     {"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
271     {"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
272     {"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
273     {"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
274     {"a\\\\\\\\b c\" d", {"a\\\\\\\\b", "c d", NULL}},
275     {"a\\\\\\\\\"b c\" d", {"a\\\\b c", "d", NULL}},
276     {"a\\\\\\\\\"\"b c\" d", {"a\\\\b", "c d", NULL}},
277     {"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
278     {"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
279     {"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
280     {"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
281     {"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
282     {"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
283     {"\"ab c\" d", {"ab c", "d", NULL}},
284     {"\"a\"b c\" d", {"ab", "c d", NULL}},
285     {"\"a\"\"b c\" d", {"a\"b", "c d", NULL}},
286     {"\"a\"\"\"b c\" d", {"a\"b c", "d", NULL}},
287     {"\"a\"\"\"\"b c\" d", {"a\"b", "c d", NULL}},
288     {"\"a\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
289     {"\"a\"\"\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
290     {"\"a\"\"\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
291     {"\"a\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
292     {"\"a\\b c\" d", {"a\\b c", "d", NULL}},
293     {"\"a\\\"b c\" d", {"a\"b c", "d", NULL}},
294     {"\"a\\\"\"b c\" d", {"a\"b", "c d", NULL}},
295     {"\"a\\\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
296     {"\"a\\\"\"\"\"b c\" d", {"a\"\"b c", "d", NULL}},
297     {"\"a\\\"\"\"\"\"b c\" d", {"a\"\"b", "c d", NULL}},
298     {"\"a\\\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
299     {"\"a\\\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b c", "d", NULL}},
300     {"\"a\\\"\"\"\"\"\"\"\"b c\" d", {"a\"\"\"b", "c d", NULL}},
301     {"\"a\\\\b c\" d", {"a\\\\b c", "d", NULL}},
302     {"\"a\\\\\"b c\" d", {"a\\b", "c d", NULL}},
303     {"\"a\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
304     {"\"a\\\\\"\"\"b c\" d", {"a\\\"b c", "d", NULL}},
305     {"\"a\\\\\"\"\"\"b c\" d", {"a\\\"b", "c d", NULL}},
306     {"\"a\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
307     {"\"a\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
308     {"\"a\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
309     {"\"a\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
310     {"\"a\\\\\\b c\" d", {"a\\\\\\b c", "d", NULL}},
311     {"\"a\\\\\\\"b c\" d", {"a\\\"b c", "d", NULL}},
312     {"\"a\\\\\\\"\"b c\" d", {"a\\\"b", "c d", NULL}},
313     {"\"a\\\\\\\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
314     {"\"a\\\\\\\"\"\"\"b c\" d", {"a\\\"\"b c", "d", NULL}},
315     {"\"a\\\\\\\"\"\"\"\"b c\" d", {"a\\\"\"b", "c d", NULL}},
316     {"\"a\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
317     {"\"a\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b c", "d", NULL}},
318     {"\"a\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\"\"\"b", "c d", NULL}},
319     {"\"a\\\\\\\\b c\" d", {"a\\\\\\\\b c", "d", NULL}},
320     {"\"a\\\\\\\\\"b c\" d", {"a\\\\b", "c d", NULL}},
321     {"\"a\\\\\\\\\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
322     {"\"a\\\\\\\\\"\"\"b c\" d", {"a\\\\\"b c", "d", NULL}},
323     {"\"a\\\\\\\\\"\"\"\"b c\" d", {"a\\\\\"b", "c d", NULL}},
324     {"\"a\\\\\\\\\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
325     {"\"a\\\\\\\\\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b c", "d", NULL}},
326     {"\"a\\\\\\\\\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"b", "c d", NULL}},
327     {"\"a\\\\\\\\\"\"\"\"\"\"\"\"b c\" d", {"a\\\\\"\"\"b", "c d", NULL}},
328 };
329
330 int main(int argc, char **argv)
331 {
332     int i, j;
333
334     if (argc > 1) {
335         /*
336          * Generation of tests.
337          * 
338          * Given `-splat <args>', we print out a C-style
339          * representation of each argument (in the form "a", "b",
340          * NULL), backslash-escaping each backslash and double
341          * quote.
342          * 
343          * Given `-split <string>', we first doctor `string' by
344          * turning forward slashes into backslashes, single quotes
345          * into double quotes and underscores into spaces; and then
346          * we feed the resulting string to ourself with `-splat'.
347          * 
348          * Given `-generate', we concoct a variety of fun test
349          * cases, encode them in quote-safe form (mapping \, " and
350          * space to /, ' and _ respectively) and feed each one to
351          * `-split'.
352          */
353         if (!strcmp(argv[1], "-splat")) {
354             int i;
355             char *p;
356             for (i = 2; i < argc; i++) {
357                 putchar('"');
358                 for (p = argv[i]; *p; p++) {
359                     if (*p == '\\' || *p == '"')
360                         putchar('\\');
361                     putchar(*p);
362                 }
363                 printf("\", ");
364             }
365             printf("NULL");
366             return 0;
367         }
368
369         if (!strcmp(argv[1], "-split") && argc > 2) {
370             char *str = malloc(20 + strlen(argv[0]) + strlen(argv[2]));
371             char *p, *q;
372
373             q = str + sprintf(str, "%s -splat ", argv[0]);
374             printf("    {\"");
375             for (p = argv[2]; *p; p++, q++) {
376                 switch (*p) {
377                   case '/':  printf("\\\\"); *q = '\\'; break;
378                   case '\'': printf("\\\""); *q = '"';  break;
379                   case '_':  printf(" ");    *q = ' ';  break;
380                   default:   putchar(*p);    *q = *p;   break;
381                 }
382             }
383             *p = '\0';
384             printf("\", {");
385             fflush(stdout);
386
387             system(str);
388
389             printf("}},\n");
390
391             return 0;
392         }
393
394         if (!strcmp(argv[1], "-generate")) {
395             char *teststr, *p;
396             int i, initialquote, backslashes, quotes;
397
398             teststr = malloc(200 + strlen(argv[0]));
399
400             for (initialquote = 0; initialquote <= 1; initialquote++) {
401                 for (backslashes = 0; backslashes < 5; backslashes++) {
402                     for (quotes = 0; quotes < 9; quotes++) {
403                         p = teststr + sprintf(teststr, "%s -split ", argv[0]);
404                         if (initialquote) *p++ = '\'';
405                         *p++ = 'a';
406                         for (i = 0; i < backslashes; i++) *p++ = '/';
407                         for (i = 0; i < quotes; i++) *p++ = '\'';
408                         *p++ = 'b';
409                         *p++ = '_';
410                         *p++ = 'c';
411                         *p++ = '\'';
412                         *p++ = '_';
413                         *p++ = 'd';
414                         *p = '\0';
415
416                         system(teststr);
417                     }
418                 }
419             }
420             return 0;
421         }
422
423         fprintf(stderr, "unrecognised option: \"%s\"\n", argv[1]);
424         return 1;
425     }
426
427     /*
428      * If we get here, we were invoked with no arguments, so just
429      * run the tests.
430      */
431
432     for (i = 0; i < lenof(argv_tests); i++) {
433         int ac;
434         char **av;
435
436         split_into_argv(argv_tests[i].cmdline, &ac, &av);
437
438         for (j = 0; j < ac && argv_tests[i].argv[j]; j++) {
439             if (strcmp(av[j], argv_tests[i].argv[j])) {
440                 printf("failed test %d (|%s|) arg %d: |%s| should be |%s|\n",
441                        i, argv_tests[i].cmdline,
442                        j, av[j], argv_tests[i].argv[j]);
443             }
444 #ifdef VERBOSE
445             else {
446                 printf("test %d (|%s|) arg %d: |%s| == |%s|\n",
447                        i, argv_tests[i].cmdline,
448                        j, av[j], argv_tests[i].argv[j]);
449             }
450 #endif
451         }
452         if (j < ac)
453             printf("failed test %d (|%s|): %d args returned, should be %d\n",
454                    i, argv_tests[i].cmdline, ac, j);
455         if (argv_tests[i].argv[j])
456             printf("failed test %d (|%s|): %d args returned, should be more\n",
457                    i, argv_tests[i].cmdline, ac);
458     }
459
460     return 0;
461 }
462
463 #endif