docs/cut
diff code/cut.c__gnu.1992-11-08 @ 14:21ad1c1548c4
Code ausgewaehlter Implementierungen eingefuegt
Das Datum entspricht dem Dateiaenderungsdatum.
author | markus schnalke <meillo@marmaro.de> |
---|---|
date | Tue, 12 May 2015 06:46:59 +0200 |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/code/cut.c__gnu.1992-11-08 Tue May 12 06:46:59 2015 +0200 1.3 @@ -0,0 +1,586 @@ 1.4 +/* cut - remove parts of lines of files 1.5 + Copyright (C) 1984 by David M. Ihnat 1.6 + 1.7 + This program is a total rewrite of the Bell Laboratories Unix(Tm) 1.8 + command of the same name, as of System V. It contains no proprietary 1.9 + code, and therefore may be used without violation of any proprietary 1.10 + agreements whatsoever. However, you will notice that the program is 1.11 + copyrighted by me. This is to assure the program does *not* fall 1.12 + into the public domain. Thus, I may specify just what I am now: 1.13 + This program may be freely copied and distributed, provided this notice 1.14 + remains; it may not be sold for profit without express written consent of 1.15 + the author. 1.16 + Please note that I recreated the behavior of the Unix(Tm) 'cut' command 1.17 + as faithfully as possible; however, I haven't run a full set of regression 1.18 + tests. Thus, the user of this program accepts full responsibility for any 1.19 + effects or loss; in particular, the author is not responsible for any losses, 1.20 + explicit or incidental, that may be incurred through use of this program. 1.21 + 1.22 + I ask that any bugs (and, if possible, fixes) be reported to me when 1.23 + possible. -David Ihnat (312) 784-4544 ignatz@homebru.chi.il.us 1.24 + 1.25 + POSIX changes, bug fixes, long-named options, and cleanup 1.26 + by David MacKenzie <djm@ai.mit.edu>. 1.27 + 1.28 + Options: 1.29 + --bytes=byte-list 1.30 + -b byte-list Print only the bytes in positions listed 1.31 + in BYTE-LIST. 1.32 + Tabs and backspaces are treated like any 1.33 + other character; they take up 1 byte. 1.34 + 1.35 + --characters=character-list 1.36 + -c character-list Print only characters in positions listed 1.37 + in CHARACTER-LIST. 1.38 + The same as -b for now, but 1.39 + internationalization will change that. 1.40 + Tabs and backspaces are treated like any 1.41 + other character; they take up 1 character. 1.42 + 1.43 + --fields=field-list 1.44 + -f field-list Print only the fields listed in FIELD-LIST. 1.45 + Fields are separated by a TAB by default. 1.46 + 1.47 + --delimiter=delim 1.48 + -d delim For -f, fields are separated by the first 1.49 + character in DELIM instead of TAB. 1.50 + 1.51 + -n Do not split multibyte chars (no-op for now). 1.52 + 1.53 + --only-delimited 1.54 + -s For -f, do not print lines that do not contain 1.55 + the field separator character. 1.56 + 1.57 + The BYTE-LIST, CHARACTER-LIST, and FIELD-LIST are one or more numbers 1.58 + or ranges separated by commas. The first byte, character, and field 1.59 + are numbered 1. 1.60 + 1.61 + A FILE of `-' means standard input. */ 1.62 + 1.63 +#define _GNU_SOURCE 1.64 +#include <ctype.h> 1.65 +#ifndef isblank 1.66 +#define isblank(c) ((c) == ' ' || (c) == '\t') 1.67 +#endif 1.68 +#include <stdio.h> 1.69 +#include <getopt.h> 1.70 +#include <sys/types.h> 1.71 +#include "system.h" 1.72 + 1.73 +#ifdef isascii 1.74 +#define ISDIGIT(c) (isascii ((c)) && isdigit ((c))) 1.75 +#else 1.76 +#define ISDIGIT(c) (isdigit ((c))) 1.77 +#endif 1.78 + 1.79 +char *xmalloc (); 1.80 +char *xrealloc (); 1.81 +int set_fields (); 1.82 +int cut_file (); 1.83 +void cut_stream (); 1.84 +void cut_bytes (); 1.85 +void cut_fields (); 1.86 +void enlarge_line (); 1.87 +void error (); 1.88 +void invalid_list (); 1.89 +void usage (); 1.90 + 1.91 +/* The number of elements allocated for the input line 1.92 + and the byte or field number. 1.93 + Enlarged as necessary. */ 1.94 +int line_size; 1.95 + 1.96 +/* Processed output buffer. */ 1.97 +char *outbuf; 1.98 + 1.99 +/* Where to save next char to output. */ 1.100 +char *outbufptr; 1.101 + 1.102 +/* Raw line buffer for field mode. */ 1.103 +char *inbuf; 1.104 + 1.105 +/* Where to save next input char. */ 1.106 +char *inbufptr; 1.107 + 1.108 +/* What can be done about a byte or field. */ 1.109 +enum field_action 1.110 +{ 1.111 + field_omit, 1.112 + field_output 1.113 +}; 1.114 + 1.115 +/* In byte mode, which bytes to output. 1.116 + In field mode, which `delim'-separated fields to output. 1.117 + Both bytes and fields are numbered starting with 1, 1.118 + so the first element of `fields' is unused. */ 1.119 +enum field_action *fields; 1.120 + 1.121 +enum operating_mode 1.122 +{ 1.123 + undefined_mode, 1.124 + 1.125 + /* Output characters that are in the given bytes. */ 1.126 + byte_mode, 1.127 + 1.128 + /* Output the given delimeter-separated fields. */ 1.129 + field_mode 1.130 +}; 1.131 + 1.132 +enum operating_mode operating_mode; 1.133 + 1.134 +/* If nonzero, 1.135 + for field mode, do not output lines containing no delimeter characters. */ 1.136 +int delimited_lines_only; 1.137 + 1.138 +/* The delimeter character for field mode. */ 1.139 +unsigned char delim; 1.140 + 1.141 +/* Nonzero if we have ever read standard input. */ 1.142 +int have_read_stdin; 1.143 + 1.144 +/* The name this program was run with. */ 1.145 +char *program_name; 1.146 + 1.147 +struct option longopts[] = 1.148 +{ 1.149 + {"bytes", 1, 0, 'b'}, 1.150 + {"characters", 1, 0, 'c'}, 1.151 + {"fields", 1, 0, 'f'}, 1.152 + {"delimiter", 1, 0, 'd'}, 1.153 + {"only-delimited", 0, 0, 's'}, 1.154 + {0, 0, 0, 0} 1.155 +}; 1.156 + 1.157 +void 1.158 +main (argc, argv) 1.159 + int argc; 1.160 + char **argv; 1.161 +{ 1.162 + int optc, exit_status = 0; 1.163 + 1.164 + program_name = argv[0]; 1.165 + 1.166 + line_size = 512; 1.167 + operating_mode = undefined_mode; 1.168 + delimited_lines_only = 0; 1.169 + delim = '\0'; 1.170 + have_read_stdin = 0; 1.171 + 1.172 + fields = (enum field_action *) 1.173 + xmalloc (line_size * sizeof (enum field_action)); 1.174 + outbuf = (char *) xmalloc (line_size); 1.175 + inbuf = (char *) xmalloc (line_size); 1.176 + 1.177 + for (optc = 0; optc < line_size; optc++) 1.178 + fields[optc] = field_omit; 1.179 + 1.180 + while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, (int *) 0)) 1.181 + != EOF) 1.182 + { 1.183 + switch (optc) 1.184 + { 1.185 + case 'b': 1.186 + case 'c': 1.187 + /* Build the byte list. */ 1.188 + if (operating_mode != undefined_mode) 1.189 + usage (); 1.190 + operating_mode = byte_mode; 1.191 + if (set_fields (optarg) == 0) 1.192 + error (2, 0, "no fields given"); 1.193 + break; 1.194 + 1.195 + case 'f': 1.196 + /* Build the field list. */ 1.197 + if (operating_mode != undefined_mode) 1.198 + usage (); 1.199 + operating_mode = field_mode; 1.200 + if (set_fields (optarg) == 0) 1.201 + error (2, 0, "no fields given"); 1.202 + break; 1.203 + 1.204 + case 'd': 1.205 + /* New delimiter. */ 1.206 + if (optarg[0] == '\0') 1.207 + error (2, 0, "no delimiter given"); 1.208 + if (optarg[1] != '\0') 1.209 + error (2, 0, "delimiter must be a single character"); 1.210 + delim = optarg[0]; 1.211 + break; 1.212 + 1.213 + case 'n': 1.214 + break; 1.215 + 1.216 + case 's': 1.217 + delimited_lines_only++; 1.218 + break; 1.219 + 1.220 + default: 1.221 + usage (); 1.222 + } 1.223 + } 1.224 + 1.225 + if (operating_mode == undefined_mode) 1.226 + usage (); 1.227 + 1.228 + if ((delimited_lines_only || delim != '\0') && operating_mode != field_mode) 1.229 + usage (); 1.230 + 1.231 + if (delim == '\0') 1.232 + delim = '\t'; 1.233 + 1.234 + if (optind == argc) 1.235 + exit_status |= cut_file ("-"); 1.236 + else 1.237 + for (; optind < argc; optind++) 1.238 + exit_status |= cut_file (argv[optind]); 1.239 + 1.240 + if (have_read_stdin && fclose (stdin) == EOF) 1.241 + { 1.242 + error (0, errno, "-"); 1.243 + exit_status = 1; 1.244 + } 1.245 + if (ferror (stdout) || fclose (stdout) == EOF) 1.246 + error (1, 0, "write error"); 1.247 + 1.248 + exit (exit_status); 1.249 +} 1.250 + 1.251 +/* Select for printing the positions in `fields' that are listed in 1.252 + byte or field specification FIELDSTR. FIELDSTR should be 1.253 + composed of one or more numbers or ranges of numbers, separated by 1.254 + blanks or commas. Incomplete ranges may be given: `-m' means 1.255 + `1-m'; `n-' means `n' through end of line or last field. 1.256 + 1.257 + Return the number of fields selected. */ 1.258 + 1.259 +int 1.260 +set_fields (fieldstr) 1.261 + char *fieldstr; 1.262 +{ 1.263 + int initial = 1; /* Value of first number in a range. */ 1.264 + int dash_found = 0; /* Nonzero if a '-' is found in this field. */ 1.265 + int value = 0; /* If nonzero, a number being accumulated. */ 1.266 + int fields_selected = 0; /* Number of fields selected so far. */ 1.267 + /* If nonzero, index of first field in a range that goes to end of line. */ 1.268 + int eol_range_start = 0; 1.269 + 1.270 + for (;;) 1.271 + { 1.272 + if (*fieldstr == '-') 1.273 + { 1.274 + /* Starting a range. */ 1.275 + if (dash_found) 1.276 + invalid_list (); 1.277 + dash_found++; 1.278 + fieldstr++; 1.279 + 1.280 + if (value) 1.281 + { 1.282 + if (value >= line_size) 1.283 + enlarge_line (value); 1.284 + initial = value; 1.285 + value = 0; 1.286 + } 1.287 + else 1.288 + initial = 1; 1.289 + } 1.290 + else if (*fieldstr == ',' || isblank (*fieldstr) || *fieldstr == '\0') 1.291 + { 1.292 + /* Ending the string, or this field/byte sublist. */ 1.293 + if (dash_found) 1.294 + { 1.295 + dash_found = 0; 1.296 + 1.297 + /* A range. Possibilites: -n, m-n, n-. 1.298 + In any case, `initial' contains the start of the range. */ 1.299 + if (value == 0) 1.300 + { 1.301 + /* `n-'. From `initial' to end of line. */ 1.302 + eol_range_start = initial; 1.303 + fields_selected++; 1.304 + } 1.305 + else 1.306 + { 1.307 + /* `m-n' or `-n' (1-n). */ 1.308 + if (value < initial) 1.309 + invalid_list (); 1.310 + 1.311 + if (value >= line_size) 1.312 + enlarge_line (value); 1.313 + 1.314 + /* Is there already a range going to end of line? */ 1.315 + if (eol_range_start != 0) 1.316 + { 1.317 + /* Yes. Is the new sequence already contained 1.318 + in the old one? If so, no processing is 1.319 + necessary. */ 1.320 + if (initial < eol_range_start) 1.321 + { 1.322 + /* No, the new sequence starts before the 1.323 + old. Does the old range going to end of line 1.324 + extend into the new range? */ 1.325 + if (eol_range_start < value) 1.326 + /* Yes. Simply move the end of line marker. */ 1.327 + eol_range_start = initial; 1.328 + else 1.329 + { 1.330 + /* No. A simple range, before and disjoint from 1.331 + the range going to end of line. Fill it. */ 1.332 + for (; initial <= value; initial++) 1.333 + fields[initial] = field_output; 1.334 + } 1.335 + 1.336 + /* In any case, some fields were selected. */ 1.337 + fields_selected++; 1.338 + } 1.339 + } 1.340 + else 1.341 + { 1.342 + /* There is no range going to end of line. */ 1.343 + for (; initial <= value; initial++) 1.344 + fields[initial] = field_output; 1.345 + fields_selected++; 1.346 + } 1.347 + value = 0; 1.348 + } 1.349 + } 1.350 + else if (value != 0) 1.351 + { 1.352 + /* A simple field number, not a range. */ 1.353 + if (value >= line_size) 1.354 + enlarge_line (value); 1.355 + 1.356 + fields[value] = field_output; 1.357 + value = 0; 1.358 + fields_selected++; 1.359 + } 1.360 + 1.361 + if (*fieldstr == '\0') 1.362 + { 1.363 + /* If there was a range going to end of line, fill the 1.364 + array from the end of line point. */ 1.365 + if (eol_range_start) 1.366 + for (initial = eol_range_start; initial < line_size; initial++) 1.367 + fields[initial] = field_output; 1.368 + 1.369 + return fields_selected; 1.370 + } 1.371 + 1.372 + fieldstr++; 1.373 + } 1.374 + else if (ISDIGIT (*fieldstr)) 1.375 + { 1.376 + value = 10 * value + *fieldstr - '0'; 1.377 + fieldstr++; 1.378 + } 1.379 + else 1.380 + invalid_list (); 1.381 + } 1.382 +} 1.383 + 1.384 +/* Process file FILE to standard output. 1.385 + Return 0 if successful, 1 if not. */ 1.386 + 1.387 +int 1.388 +cut_file (file) 1.389 + char *file; 1.390 +{ 1.391 + FILE *stream; 1.392 + 1.393 + if (!strcmp (file, "-")) 1.394 + { 1.395 + have_read_stdin = 1; 1.396 + stream = stdin; 1.397 + } 1.398 + else 1.399 + { 1.400 + stream = fopen (file, "r"); 1.401 + if (stream == NULL) 1.402 + { 1.403 + error (0, errno, "%s", file); 1.404 + return 1; 1.405 + } 1.406 + } 1.407 + 1.408 + cut_stream (stream); 1.409 + 1.410 + if (ferror (stream)) 1.411 + { 1.412 + error (0, errno, "%s", file); 1.413 + return 1; 1.414 + } 1.415 + if (!strcmp (file, "-")) 1.416 + clearerr (stream); /* Also clear EOF. */ 1.417 + else if (fclose (stream) == EOF) 1.418 + { 1.419 + error (0, errno, "%s", file); 1.420 + return 1; 1.421 + } 1.422 + return 0; 1.423 +} 1.424 + 1.425 +void 1.426 +cut_stream (stream) 1.427 + FILE *stream; 1.428 +{ 1.429 + if (operating_mode == byte_mode) 1.430 + cut_bytes (stream); 1.431 + else 1.432 + cut_fields (stream); 1.433 +} 1.434 + 1.435 +/* Print the file open for reading on stream STREAM 1.436 + with the bytes marked `field_omit' in `fields' removed from each line. */ 1.437 + 1.438 +void 1.439 +cut_bytes (stream) 1.440 + FILE *stream; 1.441 +{ 1.442 + register int c; /* Each character from the file. */ 1.443 + int doneflag = 0; /* Nonzero if EOF reached. */ 1.444 + int char_count; /* Number of chars in the line so far. */ 1.445 + 1.446 + while (doneflag == 0) 1.447 + { 1.448 + /* Start processing a line. */ 1.449 + outbufptr = outbuf; 1.450 + char_count = 0; 1.451 + 1.452 + do 1.453 + { 1.454 + c = getc (stream); 1.455 + if (c == EOF) 1.456 + { 1.457 + doneflag++; 1.458 + break; 1.459 + } 1.460 + 1.461 + /* If this character is to be sent, stow it in the outbuffer. */ 1.462 + 1.463 + if (++char_count == line_size - 1) 1.464 + enlarge_line (char_count); 1.465 + 1.466 + if (fields[char_count] == field_output || c == '\n') 1.467 + *outbufptr++ = c; 1.468 + } 1.469 + while (c != '\n'); 1.470 + 1.471 + if (char_count) 1.472 + fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout); 1.473 + } 1.474 +} 1.475 + 1.476 +/* Print the file open for reading on stream STREAM 1.477 + with the fields marked `field_omit' in `fields' removed from each line. 1.478 + All characters are initially stowed in the raw input buffer, until 1.479 + at least one field has been found. */ 1.480 + 1.481 +void 1.482 +cut_fields (stream) 1.483 + FILE *stream; 1.484 +{ 1.485 + register int c; /* Each character from the file. */ 1.486 + int doneflag = 0; /* Nonzero if EOF reached. */ 1.487 + int char_count; /* Number of chars in line before any delim. */ 1.488 + int fieldfound; /* Nonzero if any fields to print found. */ 1.489 + int curr_field; /* Current index in `fields'. */ 1.490 + 1.491 + while (doneflag == 0) 1.492 + { 1.493 + char_count = 0; 1.494 + fieldfound = 0; 1.495 + curr_field = 1; 1.496 + outbufptr = outbuf; 1.497 + inbufptr = inbuf; 1.498 + 1.499 + do 1.500 + { 1.501 + c = getc (stream); 1.502 + if (c == EOF) 1.503 + { 1.504 + doneflag++; 1.505 + break; 1.506 + } 1.507 + 1.508 + if (fields[curr_field] == field_output && c != '\n') 1.509 + { 1.510 + /* Working on a field. It, and its terminating 1.511 + delimiter, go only into the processed buffer. */ 1.512 + fieldfound = 1; 1.513 + if (outbufptr - outbuf == line_size - 2) 1.514 + enlarge_line (outbufptr - outbuf); 1.515 + *outbufptr++ = c; 1.516 + } 1.517 + else if (fieldfound == 0) 1.518 + { 1.519 + if (++char_count == line_size - 1) 1.520 + enlarge_line (char_count); 1.521 + *inbufptr++ = c; 1.522 + } 1.523 + 1.524 + if (c == delim && ++curr_field == line_size - 1) 1.525 + enlarge_line (curr_field); 1.526 + } 1.527 + while (c != '\n'); 1.528 + 1.529 + if (fieldfound) 1.530 + { 1.531 + /* Something was found. Print it. */ 1.532 + if (outbufptr[-1] == delim) 1.533 + --outbufptr; /* Suppress trailing delimiter. */ 1.534 + 1.535 + fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout); 1.536 + if (c == '\n') 1.537 + putc (c, stdout); 1.538 + } 1.539 + else if (!delimited_lines_only && char_count) 1.540 + /* A line with some characters, no delimiters, and no 1.541 + suppression. Print it. */ 1.542 + fwrite (inbuf, sizeof (char), inbufptr - inbuf, stdout); 1.543 + } 1.544 +} 1.545 + 1.546 +/* Extend the buffers to accomodate at least NEW_SIZE characters. */ 1.547 + 1.548 +void 1.549 +enlarge_line (new_size) 1.550 + int new_size; 1.551 +{ 1.552 + char *newp; 1.553 + int i; 1.554 + 1.555 + new_size += 256; /* Leave some room to grow. */ 1.556 + 1.557 + fields = (enum field_action *) 1.558 + xrealloc (fields, new_size * sizeof (enum field_action)); 1.559 + 1.560 + newp = (char *) xrealloc (outbuf, new_size); 1.561 + outbufptr += newp - outbuf; 1.562 + outbuf = newp; 1.563 + 1.564 + newp = (char *) xrealloc (inbuf, new_size); 1.565 + inbufptr += newp - inbuf; 1.566 + inbuf = newp; 1.567 + 1.568 + for (i = line_size; i < new_size; i++) 1.569 + fields[i] = field_omit; 1.570 + line_size = new_size; 1.571 +} 1.572 + 1.573 +void 1.574 +invalid_list () 1.575 +{ 1.576 + error (2, 0, "invalid byte or field list"); 1.577 +} 1.578 + 1.579 +void 1.580 +usage () 1.581 +{ 1.582 + fprintf (stderr, "\ 1.583 +Usage: %s {-b byte-list,--bytes=byte-list} [-n] [file...]\n\ 1.584 + %s {-c character-list,--characters=character-list} [file...]\n\ 1.585 + %s {-f field-list,--fields=field-list} [-d delim] [-s]\n\ 1.586 + [--delimiter=delim] [--only-delimited] [file...]\n", 1.587 + program_name, program_name, program_name); 1.588 + exit (2); 1.589 +}