comparison code/cut.c__gnu.1992-11-08 @ 14:21ad1c1548c4

Code ausgewaehlter Implementierungen eingefuegt Das Datum entspricht dem Dateiaenderungsdatum.
author markus schnalke <meillo@marmaro.de>
date Tue, 12 May 2015 06:46:59 +0200
parents
children
comparison
equal deleted inserted replaced
13:bf5e41260f89 14:21ad1c1548c4
1 /* cut - remove parts of lines of files
2 Copyright (C) 1984 by David M. Ihnat
3
4 This program is a total rewrite of the Bell Laboratories Unix(Tm)
5 command of the same name, as of System V. It contains no proprietary
6 code, and therefore may be used without violation of any proprietary
7 agreements whatsoever. However, you will notice that the program is
8 copyrighted by me. This is to assure the program does *not* fall
9 into the public domain. Thus, I may specify just what I am now:
10 This program may be freely copied and distributed, provided this notice
11 remains; it may not be sold for profit without express written consent of
12 the author.
13 Please note that I recreated the behavior of the Unix(Tm) 'cut' command
14 as faithfully as possible; however, I haven't run a full set of regression
15 tests. Thus, the user of this program accepts full responsibility for any
16 effects or loss; in particular, the author is not responsible for any losses,
17 explicit or incidental, that may be incurred through use of this program.
18
19 I ask that any bugs (and, if possible, fixes) be reported to me when
20 possible. -David Ihnat (312) 784-4544 ignatz@homebru.chi.il.us
21
22 POSIX changes, bug fixes, long-named options, and cleanup
23 by David MacKenzie <djm@ai.mit.edu>.
24
25 Options:
26 --bytes=byte-list
27 -b byte-list Print only the bytes in positions listed
28 in BYTE-LIST.
29 Tabs and backspaces are treated like any
30 other character; they take up 1 byte.
31
32 --characters=character-list
33 -c character-list Print only characters in positions listed
34 in CHARACTER-LIST.
35 The same as -b for now, but
36 internationalization will change that.
37 Tabs and backspaces are treated like any
38 other character; they take up 1 character.
39
40 --fields=field-list
41 -f field-list Print only the fields listed in FIELD-LIST.
42 Fields are separated by a TAB by default.
43
44 --delimiter=delim
45 -d delim For -f, fields are separated by the first
46 character in DELIM instead of TAB.
47
48 -n Do not split multibyte chars (no-op for now).
49
50 --only-delimited
51 -s For -f, do not print lines that do not contain
52 the field separator character.
53
54 The BYTE-LIST, CHARACTER-LIST, and FIELD-LIST are one or more numbers
55 or ranges separated by commas. The first byte, character, and field
56 are numbered 1.
57
58 A FILE of `-' means standard input. */
59
60 #define _GNU_SOURCE
61 #include <ctype.h>
62 #ifndef isblank
63 #define isblank(c) ((c) == ' ' || (c) == '\t')
64 #endif
65 #include <stdio.h>
66 #include <getopt.h>
67 #include <sys/types.h>
68 #include "system.h"
69
70 #ifdef isascii
71 #define ISDIGIT(c) (isascii ((c)) && isdigit ((c)))
72 #else
73 #define ISDIGIT(c) (isdigit ((c)))
74 #endif
75
76 char *xmalloc ();
77 char *xrealloc ();
78 int set_fields ();
79 int cut_file ();
80 void cut_stream ();
81 void cut_bytes ();
82 void cut_fields ();
83 void enlarge_line ();
84 void error ();
85 void invalid_list ();
86 void usage ();
87
88 /* The number of elements allocated for the input line
89 and the byte or field number.
90 Enlarged as necessary. */
91 int line_size;
92
93 /* Processed output buffer. */
94 char *outbuf;
95
96 /* Where to save next char to output. */
97 char *outbufptr;
98
99 /* Raw line buffer for field mode. */
100 char *inbuf;
101
102 /* Where to save next input char. */
103 char *inbufptr;
104
105 /* What can be done about a byte or field. */
106 enum field_action
107 {
108 field_omit,
109 field_output
110 };
111
112 /* In byte mode, which bytes to output.
113 In field mode, which `delim'-separated fields to output.
114 Both bytes and fields are numbered starting with 1,
115 so the first element of `fields' is unused. */
116 enum field_action *fields;
117
118 enum operating_mode
119 {
120 undefined_mode,
121
122 /* Output characters that are in the given bytes. */
123 byte_mode,
124
125 /* Output the given delimeter-separated fields. */
126 field_mode
127 };
128
129 enum operating_mode operating_mode;
130
131 /* If nonzero,
132 for field mode, do not output lines containing no delimeter characters. */
133 int delimited_lines_only;
134
135 /* The delimeter character for field mode. */
136 unsigned char delim;
137
138 /* Nonzero if we have ever read standard input. */
139 int have_read_stdin;
140
141 /* The name this program was run with. */
142 char *program_name;
143
144 struct option longopts[] =
145 {
146 {"bytes", 1, 0, 'b'},
147 {"characters", 1, 0, 'c'},
148 {"fields", 1, 0, 'f'},
149 {"delimiter", 1, 0, 'd'},
150 {"only-delimited", 0, 0, 's'},
151 {0, 0, 0, 0}
152 };
153
154 void
155 main (argc, argv)
156 int argc;
157 char **argv;
158 {
159 int optc, exit_status = 0;
160
161 program_name = argv[0];
162
163 line_size = 512;
164 operating_mode = undefined_mode;
165 delimited_lines_only = 0;
166 delim = '\0';
167 have_read_stdin = 0;
168
169 fields = (enum field_action *)
170 xmalloc (line_size * sizeof (enum field_action));
171 outbuf = (char *) xmalloc (line_size);
172 inbuf = (char *) xmalloc (line_size);
173
174 for (optc = 0; optc < line_size; optc++)
175 fields[optc] = field_omit;
176
177 while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, (int *) 0))
178 != EOF)
179 {
180 switch (optc)
181 {
182 case 'b':
183 case 'c':
184 /* Build the byte list. */
185 if (operating_mode != undefined_mode)
186 usage ();
187 operating_mode = byte_mode;
188 if (set_fields (optarg) == 0)
189 error (2, 0, "no fields given");
190 break;
191
192 case 'f':
193 /* Build the field list. */
194 if (operating_mode != undefined_mode)
195 usage ();
196 operating_mode = field_mode;
197 if (set_fields (optarg) == 0)
198 error (2, 0, "no fields given");
199 break;
200
201 case 'd':
202 /* New delimiter. */
203 if (optarg[0] == '\0')
204 error (2, 0, "no delimiter given");
205 if (optarg[1] != '\0')
206 error (2, 0, "delimiter must be a single character");
207 delim = optarg[0];
208 break;
209
210 case 'n':
211 break;
212
213 case 's':
214 delimited_lines_only++;
215 break;
216
217 default:
218 usage ();
219 }
220 }
221
222 if (operating_mode == undefined_mode)
223 usage ();
224
225 if ((delimited_lines_only || delim != '\0') && operating_mode != field_mode)
226 usage ();
227
228 if (delim == '\0')
229 delim = '\t';
230
231 if (optind == argc)
232 exit_status |= cut_file ("-");
233 else
234 for (; optind < argc; optind++)
235 exit_status |= cut_file (argv[optind]);
236
237 if (have_read_stdin && fclose (stdin) == EOF)
238 {
239 error (0, errno, "-");
240 exit_status = 1;
241 }
242 if (ferror (stdout) || fclose (stdout) == EOF)
243 error (1, 0, "write error");
244
245 exit (exit_status);
246 }
247
248 /* Select for printing the positions in `fields' that are listed in
249 byte or field specification FIELDSTR. FIELDSTR should be
250 composed of one or more numbers or ranges of numbers, separated by
251 blanks or commas. Incomplete ranges may be given: `-m' means
252 `1-m'; `n-' means `n' through end of line or last field.
253
254 Return the number of fields selected. */
255
256 int
257 set_fields (fieldstr)
258 char *fieldstr;
259 {
260 int initial = 1; /* Value of first number in a range. */
261 int dash_found = 0; /* Nonzero if a '-' is found in this field. */
262 int value = 0; /* If nonzero, a number being accumulated. */
263 int fields_selected = 0; /* Number of fields selected so far. */
264 /* If nonzero, index of first field in a range that goes to end of line. */
265 int eol_range_start = 0;
266
267 for (;;)
268 {
269 if (*fieldstr == '-')
270 {
271 /* Starting a range. */
272 if (dash_found)
273 invalid_list ();
274 dash_found++;
275 fieldstr++;
276
277 if (value)
278 {
279 if (value >= line_size)
280 enlarge_line (value);
281 initial = value;
282 value = 0;
283 }
284 else
285 initial = 1;
286 }
287 else if (*fieldstr == ',' || isblank (*fieldstr) || *fieldstr == '\0')
288 {
289 /* Ending the string, or this field/byte sublist. */
290 if (dash_found)
291 {
292 dash_found = 0;
293
294 /* A range. Possibilites: -n, m-n, n-.
295 In any case, `initial' contains the start of the range. */
296 if (value == 0)
297 {
298 /* `n-'. From `initial' to end of line. */
299 eol_range_start = initial;
300 fields_selected++;
301 }
302 else
303 {
304 /* `m-n' or `-n' (1-n). */
305 if (value < initial)
306 invalid_list ();
307
308 if (value >= line_size)
309 enlarge_line (value);
310
311 /* Is there already a range going to end of line? */
312 if (eol_range_start != 0)
313 {
314 /* Yes. Is the new sequence already contained
315 in the old one? If so, no processing is
316 necessary. */
317 if (initial < eol_range_start)
318 {
319 /* No, the new sequence starts before the
320 old. Does the old range going to end of line
321 extend into the new range? */
322 if (eol_range_start < value)
323 /* Yes. Simply move the end of line marker. */
324 eol_range_start = initial;
325 else
326 {
327 /* No. A simple range, before and disjoint from
328 the range going to end of line. Fill it. */
329 for (; initial <= value; initial++)
330 fields[initial] = field_output;
331 }
332
333 /* In any case, some fields were selected. */
334 fields_selected++;
335 }
336 }
337 else
338 {
339 /* There is no range going to end of line. */
340 for (; initial <= value; initial++)
341 fields[initial] = field_output;
342 fields_selected++;
343 }
344 value = 0;
345 }
346 }
347 else if (value != 0)
348 {
349 /* A simple field number, not a range. */
350 if (value >= line_size)
351 enlarge_line (value);
352
353 fields[value] = field_output;
354 value = 0;
355 fields_selected++;
356 }
357
358 if (*fieldstr == '\0')
359 {
360 /* If there was a range going to end of line, fill the
361 array from the end of line point. */
362 if (eol_range_start)
363 for (initial = eol_range_start; initial < line_size; initial++)
364 fields[initial] = field_output;
365
366 return fields_selected;
367 }
368
369 fieldstr++;
370 }
371 else if (ISDIGIT (*fieldstr))
372 {
373 value = 10 * value + *fieldstr - '0';
374 fieldstr++;
375 }
376 else
377 invalid_list ();
378 }
379 }
380
381 /* Process file FILE to standard output.
382 Return 0 if successful, 1 if not. */
383
384 int
385 cut_file (file)
386 char *file;
387 {
388 FILE *stream;
389
390 if (!strcmp (file, "-"))
391 {
392 have_read_stdin = 1;
393 stream = stdin;
394 }
395 else
396 {
397 stream = fopen (file, "r");
398 if (stream == NULL)
399 {
400 error (0, errno, "%s", file);
401 return 1;
402 }
403 }
404
405 cut_stream (stream);
406
407 if (ferror (stream))
408 {
409 error (0, errno, "%s", file);
410 return 1;
411 }
412 if (!strcmp (file, "-"))
413 clearerr (stream); /* Also clear EOF. */
414 else if (fclose (stream) == EOF)
415 {
416 error (0, errno, "%s", file);
417 return 1;
418 }
419 return 0;
420 }
421
422 void
423 cut_stream (stream)
424 FILE *stream;
425 {
426 if (operating_mode == byte_mode)
427 cut_bytes (stream);
428 else
429 cut_fields (stream);
430 }
431
432 /* Print the file open for reading on stream STREAM
433 with the bytes marked `field_omit' in `fields' removed from each line. */
434
435 void
436 cut_bytes (stream)
437 FILE *stream;
438 {
439 register int c; /* Each character from the file. */
440 int doneflag = 0; /* Nonzero if EOF reached. */
441 int char_count; /* Number of chars in the line so far. */
442
443 while (doneflag == 0)
444 {
445 /* Start processing a line. */
446 outbufptr = outbuf;
447 char_count = 0;
448
449 do
450 {
451 c = getc (stream);
452 if (c == EOF)
453 {
454 doneflag++;
455 break;
456 }
457
458 /* If this character is to be sent, stow it in the outbuffer. */
459
460 if (++char_count == line_size - 1)
461 enlarge_line (char_count);
462
463 if (fields[char_count] == field_output || c == '\n')
464 *outbufptr++ = c;
465 }
466 while (c != '\n');
467
468 if (char_count)
469 fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout);
470 }
471 }
472
473 /* Print the file open for reading on stream STREAM
474 with the fields marked `field_omit' in `fields' removed from each line.
475 All characters are initially stowed in the raw input buffer, until
476 at least one field has been found. */
477
478 void
479 cut_fields (stream)
480 FILE *stream;
481 {
482 register int c; /* Each character from the file. */
483 int doneflag = 0; /* Nonzero if EOF reached. */
484 int char_count; /* Number of chars in line before any delim. */
485 int fieldfound; /* Nonzero if any fields to print found. */
486 int curr_field; /* Current index in `fields'. */
487
488 while (doneflag == 0)
489 {
490 char_count = 0;
491 fieldfound = 0;
492 curr_field = 1;
493 outbufptr = outbuf;
494 inbufptr = inbuf;
495
496 do
497 {
498 c = getc (stream);
499 if (c == EOF)
500 {
501 doneflag++;
502 break;
503 }
504
505 if (fields[curr_field] == field_output && c != '\n')
506 {
507 /* Working on a field. It, and its terminating
508 delimiter, go only into the processed buffer. */
509 fieldfound = 1;
510 if (outbufptr - outbuf == line_size - 2)
511 enlarge_line (outbufptr - outbuf);
512 *outbufptr++ = c;
513 }
514 else if (fieldfound == 0)
515 {
516 if (++char_count == line_size - 1)
517 enlarge_line (char_count);
518 *inbufptr++ = c;
519 }
520
521 if (c == delim && ++curr_field == line_size - 1)
522 enlarge_line (curr_field);
523 }
524 while (c != '\n');
525
526 if (fieldfound)
527 {
528 /* Something was found. Print it. */
529 if (outbufptr[-1] == delim)
530 --outbufptr; /* Suppress trailing delimiter. */
531
532 fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout);
533 if (c == '\n')
534 putc (c, stdout);
535 }
536 else if (!delimited_lines_only && char_count)
537 /* A line with some characters, no delimiters, and no
538 suppression. Print it. */
539 fwrite (inbuf, sizeof (char), inbufptr - inbuf, stdout);
540 }
541 }
542
543 /* Extend the buffers to accomodate at least NEW_SIZE characters. */
544
545 void
546 enlarge_line (new_size)
547 int new_size;
548 {
549 char *newp;
550 int i;
551
552 new_size += 256; /* Leave some room to grow. */
553
554 fields = (enum field_action *)
555 xrealloc (fields, new_size * sizeof (enum field_action));
556
557 newp = (char *) xrealloc (outbuf, new_size);
558 outbufptr += newp - outbuf;
559 outbuf = newp;
560
561 newp = (char *) xrealloc (inbuf, new_size);
562 inbufptr += newp - inbuf;
563 inbuf = newp;
564
565 for (i = line_size; i < new_size; i++)
566 fields[i] = field_omit;
567 line_size = new_size;
568 }
569
570 void
571 invalid_list ()
572 {
573 error (2, 0, "invalid byte or field list");
574 }
575
576 void
577 usage ()
578 {
579 fprintf (stderr, "\
580 Usage: %s {-b byte-list,--bytes=byte-list} [-n] [file...]\n\
581 %s {-c character-list,--characters=character-list} [file...]\n\
582 %s {-f field-list,--fields=field-list} [-d delim] [-s]\n\
583 [--delimiter=delim] [--only-delimited] [file...]\n",
584 program_name, program_name, program_name);
585 exit (2);
586 }