Mercurial > docs > cut
comparison code/cut.c__gnu.1992-11-08 @ 14:21ad1c1548c4
Code ausgewaehlter Implementierungen eingefuegt
Das Datum entspricht dem Dateiaenderungsdatum.
author | markus schnalke <meillo@marmaro.de> |
---|---|
date | Tue, 12 May 2015 06:46:59 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
13:bf5e41260f89 | 14:21ad1c1548c4 |
---|---|
1 /* cut - remove parts of lines of files | |
2 Copyright (C) 1984 by David M. Ihnat | |
3 | |
4 This program is a total rewrite of the Bell Laboratories Unix(Tm) | |
5 command of the same name, as of System V. It contains no proprietary | |
6 code, and therefore may be used without violation of any proprietary | |
7 agreements whatsoever. However, you will notice that the program is | |
8 copyrighted by me. This is to assure the program does *not* fall | |
9 into the public domain. Thus, I may specify just what I am now: | |
10 This program may be freely copied and distributed, provided this notice | |
11 remains; it may not be sold for profit without express written consent of | |
12 the author. | |
13 Please note that I recreated the behavior of the Unix(Tm) 'cut' command | |
14 as faithfully as possible; however, I haven't run a full set of regression | |
15 tests. Thus, the user of this program accepts full responsibility for any | |
16 effects or loss; in particular, the author is not responsible for any losses, | |
17 explicit or incidental, that may be incurred through use of this program. | |
18 | |
19 I ask that any bugs (and, if possible, fixes) be reported to me when | |
20 possible. -David Ihnat (312) 784-4544 ignatz@homebru.chi.il.us | |
21 | |
22 POSIX changes, bug fixes, long-named options, and cleanup | |
23 by David MacKenzie <djm@ai.mit.edu>. | |
24 | |
25 Options: | |
26 --bytes=byte-list | |
27 -b byte-list Print only the bytes in positions listed | |
28 in BYTE-LIST. | |
29 Tabs and backspaces are treated like any | |
30 other character; they take up 1 byte. | |
31 | |
32 --characters=character-list | |
33 -c character-list Print only characters in positions listed | |
34 in CHARACTER-LIST. | |
35 The same as -b for now, but | |
36 internationalization will change that. | |
37 Tabs and backspaces are treated like any | |
38 other character; they take up 1 character. | |
39 | |
40 --fields=field-list | |
41 -f field-list Print only the fields listed in FIELD-LIST. | |
42 Fields are separated by a TAB by default. | |
43 | |
44 --delimiter=delim | |
45 -d delim For -f, fields are separated by the first | |
46 character in DELIM instead of TAB. | |
47 | |
48 -n Do not split multibyte chars (no-op for now). | |
49 | |
50 --only-delimited | |
51 -s For -f, do not print lines that do not contain | |
52 the field separator character. | |
53 | |
54 The BYTE-LIST, CHARACTER-LIST, and FIELD-LIST are one or more numbers | |
55 or ranges separated by commas. The first byte, character, and field | |
56 are numbered 1. | |
57 | |
58 A FILE of `-' means standard input. */ | |
59 | |
60 #define _GNU_SOURCE | |
61 #include <ctype.h> | |
62 #ifndef isblank | |
63 #define isblank(c) ((c) == ' ' || (c) == '\t') | |
64 #endif | |
65 #include <stdio.h> | |
66 #include <getopt.h> | |
67 #include <sys/types.h> | |
68 #include "system.h" | |
69 | |
70 #ifdef isascii | |
71 #define ISDIGIT(c) (isascii ((c)) && isdigit ((c))) | |
72 #else | |
73 #define ISDIGIT(c) (isdigit ((c))) | |
74 #endif | |
75 | |
76 char *xmalloc (); | |
77 char *xrealloc (); | |
78 int set_fields (); | |
79 int cut_file (); | |
80 void cut_stream (); | |
81 void cut_bytes (); | |
82 void cut_fields (); | |
83 void enlarge_line (); | |
84 void error (); | |
85 void invalid_list (); | |
86 void usage (); | |
87 | |
88 /* The number of elements allocated for the input line | |
89 and the byte or field number. | |
90 Enlarged as necessary. */ | |
91 int line_size; | |
92 | |
93 /* Processed output buffer. */ | |
94 char *outbuf; | |
95 | |
96 /* Where to save next char to output. */ | |
97 char *outbufptr; | |
98 | |
99 /* Raw line buffer for field mode. */ | |
100 char *inbuf; | |
101 | |
102 /* Where to save next input char. */ | |
103 char *inbufptr; | |
104 | |
105 /* What can be done about a byte or field. */ | |
106 enum field_action | |
107 { | |
108 field_omit, | |
109 field_output | |
110 }; | |
111 | |
112 /* In byte mode, which bytes to output. | |
113 In field mode, which `delim'-separated fields to output. | |
114 Both bytes and fields are numbered starting with 1, | |
115 so the first element of `fields' is unused. */ | |
116 enum field_action *fields; | |
117 | |
118 enum operating_mode | |
119 { | |
120 undefined_mode, | |
121 | |
122 /* Output characters that are in the given bytes. */ | |
123 byte_mode, | |
124 | |
125 /* Output the given delimeter-separated fields. */ | |
126 field_mode | |
127 }; | |
128 | |
129 enum operating_mode operating_mode; | |
130 | |
131 /* If nonzero, | |
132 for field mode, do not output lines containing no delimeter characters. */ | |
133 int delimited_lines_only; | |
134 | |
135 /* The delimeter character for field mode. */ | |
136 unsigned char delim; | |
137 | |
138 /* Nonzero if we have ever read standard input. */ | |
139 int have_read_stdin; | |
140 | |
141 /* The name this program was run with. */ | |
142 char *program_name; | |
143 | |
144 struct option longopts[] = | |
145 { | |
146 {"bytes", 1, 0, 'b'}, | |
147 {"characters", 1, 0, 'c'}, | |
148 {"fields", 1, 0, 'f'}, | |
149 {"delimiter", 1, 0, 'd'}, | |
150 {"only-delimited", 0, 0, 's'}, | |
151 {0, 0, 0, 0} | |
152 }; | |
153 | |
154 void | |
155 main (argc, argv) | |
156 int argc; | |
157 char **argv; | |
158 { | |
159 int optc, exit_status = 0; | |
160 | |
161 program_name = argv[0]; | |
162 | |
163 line_size = 512; | |
164 operating_mode = undefined_mode; | |
165 delimited_lines_only = 0; | |
166 delim = '\0'; | |
167 have_read_stdin = 0; | |
168 | |
169 fields = (enum field_action *) | |
170 xmalloc (line_size * sizeof (enum field_action)); | |
171 outbuf = (char *) xmalloc (line_size); | |
172 inbuf = (char *) xmalloc (line_size); | |
173 | |
174 for (optc = 0; optc < line_size; optc++) | |
175 fields[optc] = field_omit; | |
176 | |
177 while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, (int *) 0)) | |
178 != EOF) | |
179 { | |
180 switch (optc) | |
181 { | |
182 case 'b': | |
183 case 'c': | |
184 /* Build the byte list. */ | |
185 if (operating_mode != undefined_mode) | |
186 usage (); | |
187 operating_mode = byte_mode; | |
188 if (set_fields (optarg) == 0) | |
189 error (2, 0, "no fields given"); | |
190 break; | |
191 | |
192 case 'f': | |
193 /* Build the field list. */ | |
194 if (operating_mode != undefined_mode) | |
195 usage (); | |
196 operating_mode = field_mode; | |
197 if (set_fields (optarg) == 0) | |
198 error (2, 0, "no fields given"); | |
199 break; | |
200 | |
201 case 'd': | |
202 /* New delimiter. */ | |
203 if (optarg[0] == '\0') | |
204 error (2, 0, "no delimiter given"); | |
205 if (optarg[1] != '\0') | |
206 error (2, 0, "delimiter must be a single character"); | |
207 delim = optarg[0]; | |
208 break; | |
209 | |
210 case 'n': | |
211 break; | |
212 | |
213 case 's': | |
214 delimited_lines_only++; | |
215 break; | |
216 | |
217 default: | |
218 usage (); | |
219 } | |
220 } | |
221 | |
222 if (operating_mode == undefined_mode) | |
223 usage (); | |
224 | |
225 if ((delimited_lines_only || delim != '\0') && operating_mode != field_mode) | |
226 usage (); | |
227 | |
228 if (delim == '\0') | |
229 delim = '\t'; | |
230 | |
231 if (optind == argc) | |
232 exit_status |= cut_file ("-"); | |
233 else | |
234 for (; optind < argc; optind++) | |
235 exit_status |= cut_file (argv[optind]); | |
236 | |
237 if (have_read_stdin && fclose (stdin) == EOF) | |
238 { | |
239 error (0, errno, "-"); | |
240 exit_status = 1; | |
241 } | |
242 if (ferror (stdout) || fclose (stdout) == EOF) | |
243 error (1, 0, "write error"); | |
244 | |
245 exit (exit_status); | |
246 } | |
247 | |
248 /* Select for printing the positions in `fields' that are listed in | |
249 byte or field specification FIELDSTR. FIELDSTR should be | |
250 composed of one or more numbers or ranges of numbers, separated by | |
251 blanks or commas. Incomplete ranges may be given: `-m' means | |
252 `1-m'; `n-' means `n' through end of line or last field. | |
253 | |
254 Return the number of fields selected. */ | |
255 | |
256 int | |
257 set_fields (fieldstr) | |
258 char *fieldstr; | |
259 { | |
260 int initial = 1; /* Value of first number in a range. */ | |
261 int dash_found = 0; /* Nonzero if a '-' is found in this field. */ | |
262 int value = 0; /* If nonzero, a number being accumulated. */ | |
263 int fields_selected = 0; /* Number of fields selected so far. */ | |
264 /* If nonzero, index of first field in a range that goes to end of line. */ | |
265 int eol_range_start = 0; | |
266 | |
267 for (;;) | |
268 { | |
269 if (*fieldstr == '-') | |
270 { | |
271 /* Starting a range. */ | |
272 if (dash_found) | |
273 invalid_list (); | |
274 dash_found++; | |
275 fieldstr++; | |
276 | |
277 if (value) | |
278 { | |
279 if (value >= line_size) | |
280 enlarge_line (value); | |
281 initial = value; | |
282 value = 0; | |
283 } | |
284 else | |
285 initial = 1; | |
286 } | |
287 else if (*fieldstr == ',' || isblank (*fieldstr) || *fieldstr == '\0') | |
288 { | |
289 /* Ending the string, or this field/byte sublist. */ | |
290 if (dash_found) | |
291 { | |
292 dash_found = 0; | |
293 | |
294 /* A range. Possibilites: -n, m-n, n-. | |
295 In any case, `initial' contains the start of the range. */ | |
296 if (value == 0) | |
297 { | |
298 /* `n-'. From `initial' to end of line. */ | |
299 eol_range_start = initial; | |
300 fields_selected++; | |
301 } | |
302 else | |
303 { | |
304 /* `m-n' or `-n' (1-n). */ | |
305 if (value < initial) | |
306 invalid_list (); | |
307 | |
308 if (value >= line_size) | |
309 enlarge_line (value); | |
310 | |
311 /* Is there already a range going to end of line? */ | |
312 if (eol_range_start != 0) | |
313 { | |
314 /* Yes. Is the new sequence already contained | |
315 in the old one? If so, no processing is | |
316 necessary. */ | |
317 if (initial < eol_range_start) | |
318 { | |
319 /* No, the new sequence starts before the | |
320 old. Does the old range going to end of line | |
321 extend into the new range? */ | |
322 if (eol_range_start < value) | |
323 /* Yes. Simply move the end of line marker. */ | |
324 eol_range_start = initial; | |
325 else | |
326 { | |
327 /* No. A simple range, before and disjoint from | |
328 the range going to end of line. Fill it. */ | |
329 for (; initial <= value; initial++) | |
330 fields[initial] = field_output; | |
331 } | |
332 | |
333 /* In any case, some fields were selected. */ | |
334 fields_selected++; | |
335 } | |
336 } | |
337 else | |
338 { | |
339 /* There is no range going to end of line. */ | |
340 for (; initial <= value; initial++) | |
341 fields[initial] = field_output; | |
342 fields_selected++; | |
343 } | |
344 value = 0; | |
345 } | |
346 } | |
347 else if (value != 0) | |
348 { | |
349 /* A simple field number, not a range. */ | |
350 if (value >= line_size) | |
351 enlarge_line (value); | |
352 | |
353 fields[value] = field_output; | |
354 value = 0; | |
355 fields_selected++; | |
356 } | |
357 | |
358 if (*fieldstr == '\0') | |
359 { | |
360 /* If there was a range going to end of line, fill the | |
361 array from the end of line point. */ | |
362 if (eol_range_start) | |
363 for (initial = eol_range_start; initial < line_size; initial++) | |
364 fields[initial] = field_output; | |
365 | |
366 return fields_selected; | |
367 } | |
368 | |
369 fieldstr++; | |
370 } | |
371 else if (ISDIGIT (*fieldstr)) | |
372 { | |
373 value = 10 * value + *fieldstr - '0'; | |
374 fieldstr++; | |
375 } | |
376 else | |
377 invalid_list (); | |
378 } | |
379 } | |
380 | |
381 /* Process file FILE to standard output. | |
382 Return 0 if successful, 1 if not. */ | |
383 | |
384 int | |
385 cut_file (file) | |
386 char *file; | |
387 { | |
388 FILE *stream; | |
389 | |
390 if (!strcmp (file, "-")) | |
391 { | |
392 have_read_stdin = 1; | |
393 stream = stdin; | |
394 } | |
395 else | |
396 { | |
397 stream = fopen (file, "r"); | |
398 if (stream == NULL) | |
399 { | |
400 error (0, errno, "%s", file); | |
401 return 1; | |
402 } | |
403 } | |
404 | |
405 cut_stream (stream); | |
406 | |
407 if (ferror (stream)) | |
408 { | |
409 error (0, errno, "%s", file); | |
410 return 1; | |
411 } | |
412 if (!strcmp (file, "-")) | |
413 clearerr (stream); /* Also clear EOF. */ | |
414 else if (fclose (stream) == EOF) | |
415 { | |
416 error (0, errno, "%s", file); | |
417 return 1; | |
418 } | |
419 return 0; | |
420 } | |
421 | |
422 void | |
423 cut_stream (stream) | |
424 FILE *stream; | |
425 { | |
426 if (operating_mode == byte_mode) | |
427 cut_bytes (stream); | |
428 else | |
429 cut_fields (stream); | |
430 } | |
431 | |
432 /* Print the file open for reading on stream STREAM | |
433 with the bytes marked `field_omit' in `fields' removed from each line. */ | |
434 | |
435 void | |
436 cut_bytes (stream) | |
437 FILE *stream; | |
438 { | |
439 register int c; /* Each character from the file. */ | |
440 int doneflag = 0; /* Nonzero if EOF reached. */ | |
441 int char_count; /* Number of chars in the line so far. */ | |
442 | |
443 while (doneflag == 0) | |
444 { | |
445 /* Start processing a line. */ | |
446 outbufptr = outbuf; | |
447 char_count = 0; | |
448 | |
449 do | |
450 { | |
451 c = getc (stream); | |
452 if (c == EOF) | |
453 { | |
454 doneflag++; | |
455 break; | |
456 } | |
457 | |
458 /* If this character is to be sent, stow it in the outbuffer. */ | |
459 | |
460 if (++char_count == line_size - 1) | |
461 enlarge_line (char_count); | |
462 | |
463 if (fields[char_count] == field_output || c == '\n') | |
464 *outbufptr++ = c; | |
465 } | |
466 while (c != '\n'); | |
467 | |
468 if (char_count) | |
469 fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout); | |
470 } | |
471 } | |
472 | |
473 /* Print the file open for reading on stream STREAM | |
474 with the fields marked `field_omit' in `fields' removed from each line. | |
475 All characters are initially stowed in the raw input buffer, until | |
476 at least one field has been found. */ | |
477 | |
478 void | |
479 cut_fields (stream) | |
480 FILE *stream; | |
481 { | |
482 register int c; /* Each character from the file. */ | |
483 int doneflag = 0; /* Nonzero if EOF reached. */ | |
484 int char_count; /* Number of chars in line before any delim. */ | |
485 int fieldfound; /* Nonzero if any fields to print found. */ | |
486 int curr_field; /* Current index in `fields'. */ | |
487 | |
488 while (doneflag == 0) | |
489 { | |
490 char_count = 0; | |
491 fieldfound = 0; | |
492 curr_field = 1; | |
493 outbufptr = outbuf; | |
494 inbufptr = inbuf; | |
495 | |
496 do | |
497 { | |
498 c = getc (stream); | |
499 if (c == EOF) | |
500 { | |
501 doneflag++; | |
502 break; | |
503 } | |
504 | |
505 if (fields[curr_field] == field_output && c != '\n') | |
506 { | |
507 /* Working on a field. It, and its terminating | |
508 delimiter, go only into the processed buffer. */ | |
509 fieldfound = 1; | |
510 if (outbufptr - outbuf == line_size - 2) | |
511 enlarge_line (outbufptr - outbuf); | |
512 *outbufptr++ = c; | |
513 } | |
514 else if (fieldfound == 0) | |
515 { | |
516 if (++char_count == line_size - 1) | |
517 enlarge_line (char_count); | |
518 *inbufptr++ = c; | |
519 } | |
520 | |
521 if (c == delim && ++curr_field == line_size - 1) | |
522 enlarge_line (curr_field); | |
523 } | |
524 while (c != '\n'); | |
525 | |
526 if (fieldfound) | |
527 { | |
528 /* Something was found. Print it. */ | |
529 if (outbufptr[-1] == delim) | |
530 --outbufptr; /* Suppress trailing delimiter. */ | |
531 | |
532 fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout); | |
533 if (c == '\n') | |
534 putc (c, stdout); | |
535 } | |
536 else if (!delimited_lines_only && char_count) | |
537 /* A line with some characters, no delimiters, and no | |
538 suppression. Print it. */ | |
539 fwrite (inbuf, sizeof (char), inbufptr - inbuf, stdout); | |
540 } | |
541 } | |
542 | |
543 /* Extend the buffers to accomodate at least NEW_SIZE characters. */ | |
544 | |
545 void | |
546 enlarge_line (new_size) | |
547 int new_size; | |
548 { | |
549 char *newp; | |
550 int i; | |
551 | |
552 new_size += 256; /* Leave some room to grow. */ | |
553 | |
554 fields = (enum field_action *) | |
555 xrealloc (fields, new_size * sizeof (enum field_action)); | |
556 | |
557 newp = (char *) xrealloc (outbuf, new_size); | |
558 outbufptr += newp - outbuf; | |
559 outbuf = newp; | |
560 | |
561 newp = (char *) xrealloc (inbuf, new_size); | |
562 inbufptr += newp - inbuf; | |
563 inbuf = newp; | |
564 | |
565 for (i = line_size; i < new_size; i++) | |
566 fields[i] = field_omit; | |
567 line_size = new_size; | |
568 } | |
569 | |
570 void | |
571 invalid_list () | |
572 { | |
573 error (2, 0, "invalid byte or field list"); | |
574 } | |
575 | |
576 void | |
577 usage () | |
578 { | |
579 fprintf (stderr, "\ | |
580 Usage: %s {-b byte-list,--bytes=byte-list} [-n] [file...]\n\ | |
581 %s {-c character-list,--characters=character-list} [file...]\n\ | |
582 %s {-f field-list,--fields=field-list} [-d delim] [-s]\n\ | |
583 [--delimiter=delim] [--only-delimited] [file...]\n", | |
584 program_name, program_name, program_name); | |
585 exit (2); | |
586 } |