docs/cut
view code/cut.c__gnu.1992-11-08 @ 40:e294684cf338
merge
author | markus schnalke <meillo@marmaro.de> |
---|---|
date | Tue, 10 Nov 2015 21:09:04 +0100 (2015-11-10) |
parents | |
children |
line source
1 /* cut - remove parts of lines of files
2 Copyright (C) 1984 by David M. Ihnat
4 This program is a total rewrite of the Bell Laboratories Unix(Tm)
5 command of the same name, as of System V. It contains no proprietary
6 code, and therefore may be used without violation of any proprietary
7 agreements whatsoever. However, you will notice that the program is
8 copyrighted by me. This is to assure the program does *not* fall
9 into the public domain. Thus, I may specify just what I am now:
10 This program may be freely copied and distributed, provided this notice
11 remains; it may not be sold for profit without express written consent of
12 the author.
13 Please note that I recreated the behavior of the Unix(Tm) 'cut' command
14 as faithfully as possible; however, I haven't run a full set of regression
15 tests. Thus, the user of this program accepts full responsibility for any
16 effects or loss; in particular, the author is not responsible for any losses,
17 explicit or incidental, that may be incurred through use of this program.
19 I ask that any bugs (and, if possible, fixes) be reported to me when
20 possible. -David Ihnat (312) 784-4544 ignatz@homebru.chi.il.us
22 POSIX changes, bug fixes, long-named options, and cleanup
23 by David MacKenzie <djm@ai.mit.edu>.
25 Options:
26 --bytes=byte-list
27 -b byte-list Print only the bytes in positions listed
28 in BYTE-LIST.
29 Tabs and backspaces are treated like any
30 other character; they take up 1 byte.
32 --characters=character-list
33 -c character-list Print only characters in positions listed
34 in CHARACTER-LIST.
35 The same as -b for now, but
36 internationalization will change that.
37 Tabs and backspaces are treated like any
38 other character; they take up 1 character.
40 --fields=field-list
41 -f field-list Print only the fields listed in FIELD-LIST.
42 Fields are separated by a TAB by default.
44 --delimiter=delim
45 -d delim For -f, fields are separated by the first
46 character in DELIM instead of TAB.
48 -n Do not split multibyte chars (no-op for now).
50 --only-delimited
51 -s For -f, do not print lines that do not contain
52 the field separator character.
54 The BYTE-LIST, CHARACTER-LIST, and FIELD-LIST are one or more numbers
55 or ranges separated by commas. The first byte, character, and field
56 are numbered 1.
58 A FILE of `-' means standard input. */
60 #define _GNU_SOURCE
61 #include <ctype.h>
62 #ifndef isblank
63 #define isblank(c) ((c) == ' ' || (c) == '\t')
64 #endif
65 #include <stdio.h>
66 #include <getopt.h>
67 #include <sys/types.h>
68 #include "system.h"
70 #ifdef isascii
71 #define ISDIGIT(c) (isascii ((c)) && isdigit ((c)))
72 #else
73 #define ISDIGIT(c) (isdigit ((c)))
74 #endif
76 char *xmalloc ();
77 char *xrealloc ();
78 int set_fields ();
79 int cut_file ();
80 void cut_stream ();
81 void cut_bytes ();
82 void cut_fields ();
83 void enlarge_line ();
84 void error ();
85 void invalid_list ();
86 void usage ();
88 /* The number of elements allocated for the input line
89 and the byte or field number.
90 Enlarged as necessary. */
91 int line_size;
93 /* Processed output buffer. */
94 char *outbuf;
96 /* Where to save next char to output. */
97 char *outbufptr;
99 /* Raw line buffer for field mode. */
100 char *inbuf;
102 /* Where to save next input char. */
103 char *inbufptr;
105 /* What can be done about a byte or field. */
106 enum field_action
107 {
108 field_omit,
109 field_output
110 };
112 /* In byte mode, which bytes to output.
113 In field mode, which `delim'-separated fields to output.
114 Both bytes and fields are numbered starting with 1,
115 so the first element of `fields' is unused. */
116 enum field_action *fields;
118 enum operating_mode
119 {
120 undefined_mode,
122 /* Output characters that are in the given bytes. */
123 byte_mode,
125 /* Output the given delimeter-separated fields. */
126 field_mode
127 };
129 enum operating_mode operating_mode;
131 /* If nonzero,
132 for field mode, do not output lines containing no delimeter characters. */
133 int delimited_lines_only;
135 /* The delimeter character for field mode. */
136 unsigned char delim;
138 /* Nonzero if we have ever read standard input. */
139 int have_read_stdin;
141 /* The name this program was run with. */
142 char *program_name;
144 struct option longopts[] =
145 {
146 {"bytes", 1, 0, 'b'},
147 {"characters", 1, 0, 'c'},
148 {"fields", 1, 0, 'f'},
149 {"delimiter", 1, 0, 'd'},
150 {"only-delimited", 0, 0, 's'},
151 {0, 0, 0, 0}
152 };
154 void
155 main (argc, argv)
156 int argc;
157 char **argv;
158 {
159 int optc, exit_status = 0;
161 program_name = argv[0];
163 line_size = 512;
164 operating_mode = undefined_mode;
165 delimited_lines_only = 0;
166 delim = '\0';
167 have_read_stdin = 0;
169 fields = (enum field_action *)
170 xmalloc (line_size * sizeof (enum field_action));
171 outbuf = (char *) xmalloc (line_size);
172 inbuf = (char *) xmalloc (line_size);
174 for (optc = 0; optc < line_size; optc++)
175 fields[optc] = field_omit;
177 while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, (int *) 0))
178 != EOF)
179 {
180 switch (optc)
181 {
182 case 'b':
183 case 'c':
184 /* Build the byte list. */
185 if (operating_mode != undefined_mode)
186 usage ();
187 operating_mode = byte_mode;
188 if (set_fields (optarg) == 0)
189 error (2, 0, "no fields given");
190 break;
192 case 'f':
193 /* Build the field list. */
194 if (operating_mode != undefined_mode)
195 usage ();
196 operating_mode = field_mode;
197 if (set_fields (optarg) == 0)
198 error (2, 0, "no fields given");
199 break;
201 case 'd':
202 /* New delimiter. */
203 if (optarg[0] == '\0')
204 error (2, 0, "no delimiter given");
205 if (optarg[1] != '\0')
206 error (2, 0, "delimiter must be a single character");
207 delim = optarg[0];
208 break;
210 case 'n':
211 break;
213 case 's':
214 delimited_lines_only++;
215 break;
217 default:
218 usage ();
219 }
220 }
222 if (operating_mode == undefined_mode)
223 usage ();
225 if ((delimited_lines_only || delim != '\0') && operating_mode != field_mode)
226 usage ();
228 if (delim == '\0')
229 delim = '\t';
231 if (optind == argc)
232 exit_status |= cut_file ("-");
233 else
234 for (; optind < argc; optind++)
235 exit_status |= cut_file (argv[optind]);
237 if (have_read_stdin && fclose (stdin) == EOF)
238 {
239 error (0, errno, "-");
240 exit_status = 1;
241 }
242 if (ferror (stdout) || fclose (stdout) == EOF)
243 error (1, 0, "write error");
245 exit (exit_status);
246 }
248 /* Select for printing the positions in `fields' that are listed in
249 byte or field specification FIELDSTR. FIELDSTR should be
250 composed of one or more numbers or ranges of numbers, separated by
251 blanks or commas. Incomplete ranges may be given: `-m' means
252 `1-m'; `n-' means `n' through end of line or last field.
254 Return the number of fields selected. */
256 int
257 set_fields (fieldstr)
258 char *fieldstr;
259 {
260 int initial = 1; /* Value of first number in a range. */
261 int dash_found = 0; /* Nonzero if a '-' is found in this field. */
262 int value = 0; /* If nonzero, a number being accumulated. */
263 int fields_selected = 0; /* Number of fields selected so far. */
264 /* If nonzero, index of first field in a range that goes to end of line. */
265 int eol_range_start = 0;
267 for (;;)
268 {
269 if (*fieldstr == '-')
270 {
271 /* Starting a range. */
272 if (dash_found)
273 invalid_list ();
274 dash_found++;
275 fieldstr++;
277 if (value)
278 {
279 if (value >= line_size)
280 enlarge_line (value);
281 initial = value;
282 value = 0;
283 }
284 else
285 initial = 1;
286 }
287 else if (*fieldstr == ',' || isblank (*fieldstr) || *fieldstr == '\0')
288 {
289 /* Ending the string, or this field/byte sublist. */
290 if (dash_found)
291 {
292 dash_found = 0;
294 /* A range. Possibilites: -n, m-n, n-.
295 In any case, `initial' contains the start of the range. */
296 if (value == 0)
297 {
298 /* `n-'. From `initial' to end of line. */
299 eol_range_start = initial;
300 fields_selected++;
301 }
302 else
303 {
304 /* `m-n' or `-n' (1-n). */
305 if (value < initial)
306 invalid_list ();
308 if (value >= line_size)
309 enlarge_line (value);
311 /* Is there already a range going to end of line? */
312 if (eol_range_start != 0)
313 {
314 /* Yes. Is the new sequence already contained
315 in the old one? If so, no processing is
316 necessary. */
317 if (initial < eol_range_start)
318 {
319 /* No, the new sequence starts before the
320 old. Does the old range going to end of line
321 extend into the new range? */
322 if (eol_range_start < value)
323 /* Yes. Simply move the end of line marker. */
324 eol_range_start = initial;
325 else
326 {
327 /* No. A simple range, before and disjoint from
328 the range going to end of line. Fill it. */
329 for (; initial <= value; initial++)
330 fields[initial] = field_output;
331 }
333 /* In any case, some fields were selected. */
334 fields_selected++;
335 }
336 }
337 else
338 {
339 /* There is no range going to end of line. */
340 for (; initial <= value; initial++)
341 fields[initial] = field_output;
342 fields_selected++;
343 }
344 value = 0;
345 }
346 }
347 else if (value != 0)
348 {
349 /* A simple field number, not a range. */
350 if (value >= line_size)
351 enlarge_line (value);
353 fields[value] = field_output;
354 value = 0;
355 fields_selected++;
356 }
358 if (*fieldstr == '\0')
359 {
360 /* If there was a range going to end of line, fill the
361 array from the end of line point. */
362 if (eol_range_start)
363 for (initial = eol_range_start; initial < line_size; initial++)
364 fields[initial] = field_output;
366 return fields_selected;
367 }
369 fieldstr++;
370 }
371 else if (ISDIGIT (*fieldstr))
372 {
373 value = 10 * value + *fieldstr - '0';
374 fieldstr++;
375 }
376 else
377 invalid_list ();
378 }
379 }
381 /* Process file FILE to standard output.
382 Return 0 if successful, 1 if not. */
384 int
385 cut_file (file)
386 char *file;
387 {
388 FILE *stream;
390 if (!strcmp (file, "-"))
391 {
392 have_read_stdin = 1;
393 stream = stdin;
394 }
395 else
396 {
397 stream = fopen (file, "r");
398 if (stream == NULL)
399 {
400 error (0, errno, "%s", file);
401 return 1;
402 }
403 }
405 cut_stream (stream);
407 if (ferror (stream))
408 {
409 error (0, errno, "%s", file);
410 return 1;
411 }
412 if (!strcmp (file, "-"))
413 clearerr (stream); /* Also clear EOF. */
414 else if (fclose (stream) == EOF)
415 {
416 error (0, errno, "%s", file);
417 return 1;
418 }
419 return 0;
420 }
422 void
423 cut_stream (stream)
424 FILE *stream;
425 {
426 if (operating_mode == byte_mode)
427 cut_bytes (stream);
428 else
429 cut_fields (stream);
430 }
432 /* Print the file open for reading on stream STREAM
433 with the bytes marked `field_omit' in `fields' removed from each line. */
435 void
436 cut_bytes (stream)
437 FILE *stream;
438 {
439 register int c; /* Each character from the file. */
440 int doneflag = 0; /* Nonzero if EOF reached. */
441 int char_count; /* Number of chars in the line so far. */
443 while (doneflag == 0)
444 {
445 /* Start processing a line. */
446 outbufptr = outbuf;
447 char_count = 0;
449 do
450 {
451 c = getc (stream);
452 if (c == EOF)
453 {
454 doneflag++;
455 break;
456 }
458 /* If this character is to be sent, stow it in the outbuffer. */
460 if (++char_count == line_size - 1)
461 enlarge_line (char_count);
463 if (fields[char_count] == field_output || c == '\n')
464 *outbufptr++ = c;
465 }
466 while (c != '\n');
468 if (char_count)
469 fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout);
470 }
471 }
473 /* Print the file open for reading on stream STREAM
474 with the fields marked `field_omit' in `fields' removed from each line.
475 All characters are initially stowed in the raw input buffer, until
476 at least one field has been found. */
478 void
479 cut_fields (stream)
480 FILE *stream;
481 {
482 register int c; /* Each character from the file. */
483 int doneflag = 0; /* Nonzero if EOF reached. */
484 int char_count; /* Number of chars in line before any delim. */
485 int fieldfound; /* Nonzero if any fields to print found. */
486 int curr_field; /* Current index in `fields'. */
488 while (doneflag == 0)
489 {
490 char_count = 0;
491 fieldfound = 0;
492 curr_field = 1;
493 outbufptr = outbuf;
494 inbufptr = inbuf;
496 do
497 {
498 c = getc (stream);
499 if (c == EOF)
500 {
501 doneflag++;
502 break;
503 }
505 if (fields[curr_field] == field_output && c != '\n')
506 {
507 /* Working on a field. It, and its terminating
508 delimiter, go only into the processed buffer. */
509 fieldfound = 1;
510 if (outbufptr - outbuf == line_size - 2)
511 enlarge_line (outbufptr - outbuf);
512 *outbufptr++ = c;
513 }
514 else if (fieldfound == 0)
515 {
516 if (++char_count == line_size - 1)
517 enlarge_line (char_count);
518 *inbufptr++ = c;
519 }
521 if (c == delim && ++curr_field == line_size - 1)
522 enlarge_line (curr_field);
523 }
524 while (c != '\n');
526 if (fieldfound)
527 {
528 /* Something was found. Print it. */
529 if (outbufptr[-1] == delim)
530 --outbufptr; /* Suppress trailing delimiter. */
532 fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout);
533 if (c == '\n')
534 putc (c, stdout);
535 }
536 else if (!delimited_lines_only && char_count)
537 /* A line with some characters, no delimiters, and no
538 suppression. Print it. */
539 fwrite (inbuf, sizeof (char), inbufptr - inbuf, stdout);
540 }
541 }
543 /* Extend the buffers to accomodate at least NEW_SIZE characters. */
545 void
546 enlarge_line (new_size)
547 int new_size;
548 {
549 char *newp;
550 int i;
552 new_size += 256; /* Leave some room to grow. */
554 fields = (enum field_action *)
555 xrealloc (fields, new_size * sizeof (enum field_action));
557 newp = (char *) xrealloc (outbuf, new_size);
558 outbufptr += newp - outbuf;
559 outbuf = newp;
561 newp = (char *) xrealloc (inbuf, new_size);
562 inbufptr += newp - inbuf;
563 inbuf = newp;
565 for (i = line_size; i < new_size; i++)
566 fields[i] = field_omit;
567 line_size = new_size;
568 }
570 void
571 invalid_list ()
572 {
573 error (2, 0, "invalid byte or field list");
574 }
576 void
577 usage ()
578 {
579 fprintf (stderr, "\
580 Usage: %s {-b byte-list,--bytes=byte-list} [-n] [file...]\n\
581 %s {-c character-list,--characters=character-list} [file...]\n\
582 %s {-f field-list,--fields=field-list} [-d delim] [-s]\n\
583 [--delimiter=delim] [--only-delimited] [file...]\n",
584 program_name, program_name, program_name);
585 exit (2);
586 }