Mercurial > docs > cut
comparison code/cut.c__freebsd.2012-11-24 @ 14:21ad1c1548c4
Code ausgewaehlter Implementierungen eingefuegt
Das Datum entspricht dem Dateiaenderungsdatum.
author | markus schnalke <meillo@marmaro.de> |
---|---|
date | Tue, 12 May 2015 06:46:59 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
13:bf5e41260f89 | 14:21ad1c1548c4 |
---|---|
1 /* | |
2 * Copyright (c) 1989, 1993 | |
3 * The Regents of the University of California. All rights reserved. | |
4 * | |
5 * This code is derived from software contributed to Berkeley by | |
6 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. | |
7 * | |
8 * Redistribution and use in source and binary forms, with or without | |
9 * modification, are permitted provided that the following conditions | |
10 * are met: | |
11 * 1. Redistributions of source code must retain the above copyright | |
12 * notice, this list of conditions and the following disclaimer. | |
13 * 2. Redistributions in binary form must reproduce the above copyright | |
14 * notice, this list of conditions and the following disclaimer in the | |
15 * documentation and/or other materials provided with the distribution. | |
16 * 4. Neither the name of the University nor the names of its contributors | |
17 * may be used to endorse or promote products derived from this software | |
18 * without specific prior written permission. | |
19 * | |
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
30 * SUCH DAMAGE. | |
31 */ | |
32 | |
33 #ifndef lint | |
34 static const char copyright[] = | |
35 "@(#) Copyright (c) 1989, 1993\n\ | |
36 The Regents of the University of California. All rights reserved.\n"; | |
37 static const char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; | |
38 #endif /* not lint */ | |
39 #include <sys/cdefs.h> | |
40 __FBSDID("$FreeBSD$"); | |
41 | |
42 #include <ctype.h> | |
43 #include <err.h> | |
44 #include <errno.h> | |
45 #include <limits.h> | |
46 #include <locale.h> | |
47 #include <stdio.h> | |
48 #include <stdlib.h> | |
49 #include <string.h> | |
50 #include <unistd.h> | |
51 #include <wchar.h> | |
52 | |
53 static int bflag; | |
54 static int cflag; | |
55 static wchar_t dchar; | |
56 static char dcharmb[MB_LEN_MAX + 1]; | |
57 static int dflag; | |
58 static int fflag; | |
59 static int nflag; | |
60 static int sflag; | |
61 static int wflag; | |
62 | |
63 static size_t autostart, autostop, maxval; | |
64 static char * positions; | |
65 | |
66 static int b_cut(FILE *, const char *); | |
67 static int b_n_cut(FILE *, const char *); | |
68 static int c_cut(FILE *, const char *); | |
69 static int f_cut(FILE *, const char *); | |
70 static void get_list(char *); | |
71 static int is_delim(wchar_t); | |
72 static void needpos(size_t); | |
73 static void usage(void); | |
74 | |
75 int | |
76 main(int argc, char *argv[]) | |
77 { | |
78 FILE *fp; | |
79 int (*fcn)(FILE *, const char *); | |
80 int ch, rval; | |
81 size_t n; | |
82 | |
83 setlocale(LC_ALL, ""); | |
84 | |
85 fcn = NULL; | |
86 dchar = '\t'; /* default delimiter is \t */ | |
87 strcpy(dcharmb, "\t"); | |
88 | |
89 while ((ch = getopt(argc, argv, "b:c:d:f:snw")) != -1) | |
90 switch(ch) { | |
91 case 'b': | |
92 get_list(optarg); | |
93 bflag = 1; | |
94 break; | |
95 case 'c': | |
96 get_list(optarg); | |
97 cflag = 1; | |
98 break; | |
99 case 'd': | |
100 n = mbrtowc(&dchar, optarg, MB_LEN_MAX, NULL); | |
101 if (dchar == '\0' || n != strlen(optarg)) | |
102 errx(1, "bad delimiter"); | |
103 strcpy(dcharmb, optarg); | |
104 dflag = 1; | |
105 break; | |
106 case 'f': | |
107 get_list(optarg); | |
108 fflag = 1; | |
109 break; | |
110 case 's': | |
111 sflag = 1; | |
112 break; | |
113 case 'n': | |
114 nflag = 1; | |
115 break; | |
116 case 'w': | |
117 wflag = 1; | |
118 break; | |
119 case '?': | |
120 default: | |
121 usage(); | |
122 } | |
123 argc -= optind; | |
124 argv += optind; | |
125 | |
126 if (fflag) { | |
127 if (bflag || cflag || nflag || (wflag && dflag)) | |
128 usage(); | |
129 } else if (!(bflag || cflag) || dflag || sflag || wflag) | |
130 usage(); | |
131 else if (!bflag && nflag) | |
132 usage(); | |
133 | |
134 if (fflag) | |
135 fcn = f_cut; | |
136 else if (cflag) | |
137 fcn = MB_CUR_MAX > 1 ? c_cut : b_cut; | |
138 else if (bflag) | |
139 fcn = nflag && MB_CUR_MAX > 1 ? b_n_cut : b_cut; | |
140 | |
141 rval = 0; | |
142 if (*argv) | |
143 for (; *argv; ++argv) { | |
144 if (strcmp(*argv, "-") == 0) | |
145 rval |= fcn(stdin, "stdin"); | |
146 else { | |
147 if (!(fp = fopen(*argv, "r"))) { | |
148 warn("%s", *argv); | |
149 rval = 1; | |
150 continue; | |
151 } | |
152 fcn(fp, *argv); | |
153 (void)fclose(fp); | |
154 } | |
155 } | |
156 else | |
157 rval = fcn(stdin, "stdin"); | |
158 exit(rval); | |
159 } | |
160 | |
161 static void | |
162 get_list(char *list) | |
163 { | |
164 size_t setautostart, start, stop; | |
165 char *pos; | |
166 char *p; | |
167 | |
168 /* | |
169 * set a byte in the positions array to indicate if a field or | |
170 * column is to be selected; use +1, it's 1-based, not 0-based. | |
171 * Numbers and number ranges may be overlapping, repeated, and in | |
172 * any order. We handle "-3-5" although there's no real reason to. | |
173 */ | |
174 for (; (p = strsep(&list, ", \t")) != NULL;) { | |
175 setautostart = start = stop = 0; | |
176 if (*p == '-') { | |
177 ++p; | |
178 setautostart = 1; | |
179 } | |
180 if (isdigit((unsigned char)*p)) { | |
181 start = stop = strtol(p, &p, 10); | |
182 if (setautostart && start > autostart) | |
183 autostart = start; | |
184 } | |
185 if (*p == '-') { | |
186 if (isdigit((unsigned char)p[1])) | |
187 stop = strtol(p + 1, &p, 10); | |
188 if (*p == '-') { | |
189 ++p; | |
190 if (!autostop || autostop > stop) | |
191 autostop = stop; | |
192 } | |
193 } | |
194 if (*p) | |
195 errx(1, "[-bcf] list: illegal list value"); | |
196 if (!stop || !start) | |
197 errx(1, "[-bcf] list: values may not include zero"); | |
198 if (maxval < stop) { | |
199 maxval = stop; | |
200 needpos(maxval + 1); | |
201 } | |
202 for (pos = positions + start; start++ <= stop; *pos++ = 1); | |
203 } | |
204 | |
205 /* overlapping ranges */ | |
206 if (autostop && maxval > autostop) { | |
207 maxval = autostop; | |
208 needpos(maxval + 1); | |
209 } | |
210 | |
211 /* set autostart */ | |
212 if (autostart) | |
213 memset(positions + 1, '1', autostart); | |
214 } | |
215 | |
216 static void | |
217 needpos(size_t n) | |
218 { | |
219 static size_t npos; | |
220 size_t oldnpos; | |
221 | |
222 /* Grow the positions array to at least the specified size. */ | |
223 if (n > npos) { | |
224 oldnpos = npos; | |
225 if (npos == 0) | |
226 npos = n; | |
227 while (n > npos) | |
228 npos *= 2; | |
229 if ((positions = realloc(positions, npos)) == NULL) | |
230 err(1, "realloc"); | |
231 memset((char *)positions + oldnpos, 0, npos - oldnpos); | |
232 } | |
233 } | |
234 | |
235 static int | |
236 b_cut(FILE *fp, const char *fname __unused) | |
237 { | |
238 int ch, col; | |
239 char *pos; | |
240 | |
241 ch = 0; | |
242 for (;;) { | |
243 pos = positions + 1; | |
244 for (col = maxval; col; --col) { | |
245 if ((ch = getc(fp)) == EOF) | |
246 return (0); | |
247 if (ch == '\n') | |
248 break; | |
249 if (*pos++) | |
250 (void)putchar(ch); | |
251 } | |
252 if (ch != '\n') { | |
253 if (autostop) | |
254 while ((ch = getc(fp)) != EOF && ch != '\n') | |
255 (void)putchar(ch); | |
256 else | |
257 while ((ch = getc(fp)) != EOF && ch != '\n'); | |
258 } | |
259 (void)putchar('\n'); | |
260 } | |
261 return (0); | |
262 } | |
263 | |
264 /* | |
265 * Cut based on byte positions, taking care not to split multibyte characters. | |
266 * Although this function also handles the case where -n is not specified, | |
267 * b_cut() ought to be much faster. | |
268 */ | |
269 static int | |
270 b_n_cut(FILE *fp, const char *fname) | |
271 { | |
272 size_t col, i, lbuflen; | |
273 char *lbuf; | |
274 int canwrite, clen, warned; | |
275 mbstate_t mbs; | |
276 | |
277 memset(&mbs, 0, sizeof(mbs)); | |
278 warned = 0; | |
279 while ((lbuf = fgetln(fp, &lbuflen)) != NULL) { | |
280 for (col = 0; lbuflen > 0; col += clen) { | |
281 if ((clen = mbrlen(lbuf, lbuflen, &mbs)) < 0) { | |
282 if (!warned) { | |
283 warn("%s", fname); | |
284 warned = 1; | |
285 } | |
286 memset(&mbs, 0, sizeof(mbs)); | |
287 clen = 1; | |
288 } | |
289 if (clen == 0 || *lbuf == '\n') | |
290 break; | |
291 if (col < maxval && !positions[1 + col]) { | |
292 /* | |
293 * Print the character if (1) after an initial | |
294 * segment of un-selected bytes, the rest of | |
295 * it is selected, and (2) the last byte is | |
296 * selected. | |
297 */ | |
298 i = col; | |
299 while (i < col + clen && i < maxval && | |
300 !positions[1 + i]) | |
301 i++; | |
302 canwrite = i < col + clen; | |
303 for (; i < col + clen && i < maxval; i++) | |
304 canwrite &= positions[1 + i]; | |
305 if (canwrite) | |
306 fwrite(lbuf, 1, clen, stdout); | |
307 } else { | |
308 /* | |
309 * Print the character if all of it has | |
310 * been selected. | |
311 */ | |
312 canwrite = 1; | |
313 for (i = col; i < col + clen; i++) | |
314 if ((i >= maxval && !autostop) || | |
315 (i < maxval && !positions[1 + i])) { | |
316 canwrite = 0; | |
317 break; | |
318 } | |
319 if (canwrite) | |
320 fwrite(lbuf, 1, clen, stdout); | |
321 } | |
322 lbuf += clen; | |
323 lbuflen -= clen; | |
324 } | |
325 if (lbuflen > 0) | |
326 putchar('\n'); | |
327 } | |
328 return (warned); | |
329 } | |
330 | |
331 static int | |
332 c_cut(FILE *fp, const char *fname) | |
333 { | |
334 wint_t ch; | |
335 int col; | |
336 char *pos; | |
337 | |
338 ch = 0; | |
339 for (;;) { | |
340 pos = positions + 1; | |
341 for (col = maxval; col; --col) { | |
342 if ((ch = getwc(fp)) == WEOF) | |
343 goto out; | |
344 if (ch == '\n') | |
345 break; | |
346 if (*pos++) | |
347 (void)putwchar(ch); | |
348 } | |
349 if (ch != '\n') { | |
350 if (autostop) | |
351 while ((ch = getwc(fp)) != WEOF && ch != '\n') | |
352 (void)putwchar(ch); | |
353 else | |
354 while ((ch = getwc(fp)) != WEOF && ch != '\n'); | |
355 } | |
356 (void)putwchar('\n'); | |
357 } | |
358 out: | |
359 if (ferror(fp)) { | |
360 warn("%s", fname); | |
361 return (1); | |
362 } | |
363 return (0); | |
364 } | |
365 | |
366 static int | |
367 is_delim(wchar_t ch) | |
368 { | |
369 if (wflag) { | |
370 if (ch == ' ' || ch == '\t') | |
371 return 1; | |
372 } else { | |
373 if (ch == dchar) | |
374 return 1; | |
375 } | |
376 return 0; | |
377 } | |
378 | |
379 static int | |
380 f_cut(FILE *fp, const char *fname) | |
381 { | |
382 wchar_t ch; | |
383 int field, i, isdelim; | |
384 char *pos, *p; | |
385 int output; | |
386 char *lbuf, *mlbuf; | |
387 size_t clen, lbuflen, reallen; | |
388 | |
389 mlbuf = NULL; | |
390 while ((lbuf = fgetln(fp, &lbuflen)) != NULL) { | |
391 reallen = lbuflen; | |
392 /* Assert EOL has a newline. */ | |
393 if (*(lbuf + lbuflen - 1) != '\n') { | |
394 /* Can't have > 1 line with no trailing newline. */ | |
395 mlbuf = malloc(lbuflen + 1); | |
396 if (mlbuf == NULL) | |
397 err(1, "malloc"); | |
398 memcpy(mlbuf, lbuf, lbuflen); | |
399 *(mlbuf + lbuflen) = '\n'; | |
400 lbuf = mlbuf; | |
401 reallen++; | |
402 } | |
403 output = 0; | |
404 for (isdelim = 0, p = lbuf;; p += clen) { | |
405 clen = mbrtowc(&ch, p, lbuf + reallen - p, NULL); | |
406 if (clen == (size_t)-1 || clen == (size_t)-2) { | |
407 warnc(EILSEQ, "%s", fname); | |
408 free(mlbuf); | |
409 return (1); | |
410 } | |
411 if (clen == 0) | |
412 clen = 1; | |
413 /* this should work if newline is delimiter */ | |
414 if (is_delim(ch)) | |
415 isdelim = 1; | |
416 if (ch == '\n') { | |
417 if (!isdelim && !sflag) | |
418 (void)fwrite(lbuf, lbuflen, 1, stdout); | |
419 break; | |
420 } | |
421 } | |
422 if (!isdelim) | |
423 continue; | |
424 | |
425 pos = positions + 1; | |
426 for (field = maxval, p = lbuf; field; --field, ++pos) { | |
427 if (*pos && output++) | |
428 for (i = 0; dcharmb[i] != '\0'; i++) | |
429 putchar(dcharmb[i]); | |
430 for (;;) { | |
431 clen = mbrtowc(&ch, p, lbuf + reallen - p, | |
432 NULL); | |
433 if (clen == (size_t)-1 || clen == (size_t)-2) { | |
434 warnc(EILSEQ, "%s", fname); | |
435 free(mlbuf); | |
436 return (1); | |
437 } | |
438 if (clen == 0) | |
439 clen = 1; | |
440 p += clen; | |
441 if (ch == '\n' || is_delim(ch)) { | |
442 /* compress whitespace */ | |
443 if (wflag && ch != '\n') | |
444 while (is_delim(*p)) | |
445 p++; | |
446 break; | |
447 } | |
448 if (*pos) | |
449 for (i = 0; i < (int)clen; i++) | |
450 putchar(p[i - clen]); | |
451 } | |
452 if (ch == '\n') | |
453 break; | |
454 } | |
455 if (ch != '\n') { | |
456 if (autostop) { | |
457 if (output) | |
458 for (i = 0; dcharmb[i] != '\0'; i++) | |
459 putchar(dcharmb[i]); | |
460 for (; (ch = *p) != '\n'; ++p) | |
461 (void)putchar(ch); | |
462 } else | |
463 for (; (ch = *p) != '\n'; ++p); | |
464 } | |
465 (void)putchar('\n'); | |
466 } | |
467 free(mlbuf); | |
468 return (0); | |
469 } | |
470 | |
471 static void | |
472 usage(void) | |
473 { | |
474 (void)fprintf(stderr, "%s\n%s\n%s\n", | |
475 "usage: cut -b list [-n] [file ...]", | |
476 " cut -c list [file ...]", | |
477 " cut -f list [-s] [-w | -d delim] [file ...]"); | |
478 exit(1); | |
479 } |