docs/cut
diff code/cut.c__gnu.2015-05-01 @ 14:21ad1c1548c4
Code ausgewaehlter Implementierungen eingefuegt
Das Datum entspricht dem Dateiaenderungsdatum.
author | markus schnalke <meillo@marmaro.de> |
---|---|
date | Tue, 12 May 2015 06:46:59 +0200 |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/code/cut.c__gnu.2015-05-01 Tue May 12 06:46:59 2015 +0200 1.3 @@ -0,0 +1,830 @@ 1.4 +/* cut - remove parts of lines of files 1.5 + Copyright (C) 1997-2015 Free Software Foundation, Inc. 1.6 + Copyright (C) 1984 David M. Ihnat 1.7 + 1.8 + This program is free software: you can redistribute it and/or modify 1.9 + it under the terms of the GNU General Public License as published by 1.10 + the Free Software Foundation, either version 3 of the License, or 1.11 + (at your option) any later version. 1.12 + 1.13 + This program is distributed in the hope that it will be useful, 1.14 + but WITHOUT ANY WARRANTY; without even the implied warranty of 1.15 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1.16 + GNU General Public License for more details. 1.17 + 1.18 + You should have received a copy of the GNU General Public License 1.19 + along with this program. If not, see <http://www.gnu.org/licenses/>. */ 1.20 + 1.21 +/* Written by David Ihnat. */ 1.22 + 1.23 +/* POSIX changes, bug fixes, long-named options, and cleanup 1.24 + by David MacKenzie <djm@gnu.ai.mit.edu>. 1.25 + 1.26 + Rewrite cut_fields and cut_bytes -- Jim Meyering. */ 1.27 + 1.28 +#include <config.h> 1.29 + 1.30 +#include <stdio.h> 1.31 +#include <assert.h> 1.32 +#include <getopt.h> 1.33 +#include <sys/types.h> 1.34 +#include "system.h" 1.35 + 1.36 +#include "error.h" 1.37 +#include "fadvise.h" 1.38 +#include "getndelim2.h" 1.39 +#include "hash.h" 1.40 +#include "quote.h" 1.41 +#include "xstrndup.h" 1.42 + 1.43 +/* The official name of this program (e.g., no 'g' prefix). */ 1.44 +#define PROGRAM_NAME "cut" 1.45 + 1.46 +#define AUTHORS \ 1.47 + proper_name ("David M. Ihnat"), \ 1.48 + proper_name ("David MacKenzie"), \ 1.49 + proper_name ("Jim Meyering") 1.50 + 1.51 +#define FATAL_ERROR(Message) \ 1.52 + do \ 1.53 + { \ 1.54 + error (0, 0, (Message)); \ 1.55 + usage (EXIT_FAILURE); \ 1.56 + } \ 1.57 + while (0) 1.58 + 1.59 + 1.60 +struct range_pair 1.61 + { 1.62 + size_t lo; 1.63 + size_t hi; 1.64 + }; 1.65 + 1.66 +/* Array of `struct range_pair' holding all the finite ranges. */ 1.67 +static struct range_pair *rp; 1.68 + 1.69 +/* Pointer inside RP. When checking if a byte or field is selected 1.70 + by a finite range, we check if it is between CURRENT_RP.LO 1.71 + and CURRENT_RP.HI. If the byte or field index is greater than 1.72 + CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */ 1.73 +static struct range_pair *current_rp; 1.74 + 1.75 +/* Number of finite ranges specified by the user. */ 1.76 +static size_t n_rp; 1.77 + 1.78 +/* Number of `struct range_pair's allocated. */ 1.79 +static size_t n_rp_allocated; 1.80 + 1.81 + 1.82 +/* Append LOW, HIGH to the list RP of range pairs, allocating additional 1.83 + space if necessary. Update global variable N_RP. When allocating, 1.84 + update global variable N_RP_ALLOCATED. */ 1.85 + 1.86 +static void 1.87 +add_range_pair (size_t lo, size_t hi) 1.88 +{ 1.89 + if (n_rp == n_rp_allocated) 1.90 + rp = X2NREALLOC (rp, &n_rp_allocated); 1.91 + rp[n_rp].lo = lo; 1.92 + rp[n_rp].hi = hi; 1.93 + ++n_rp; 1.94 +} 1.95 + 1.96 +/* This buffer is used to support the semantics of the -s option 1.97 + (or lack of same) when the specified field list includes (does 1.98 + not include) the first field. In both of those cases, the entire 1.99 + first field must be read into this buffer to determine whether it 1.100 + is followed by a delimiter or a newline before any of it may be 1.101 + output. Otherwise, cut_fields can do the job without using this 1.102 + buffer. */ 1.103 +static char *field_1_buffer; 1.104 + 1.105 +/* The number of bytes allocated for FIELD_1_BUFFER. */ 1.106 +static size_t field_1_bufsize; 1.107 + 1.108 +enum operating_mode 1.109 + { 1.110 + undefined_mode, 1.111 + 1.112 + /* Output characters that are in the given bytes. */ 1.113 + byte_mode, 1.114 + 1.115 + /* Output the given delimiter-separated fields. */ 1.116 + field_mode 1.117 + }; 1.118 + 1.119 +static enum operating_mode operating_mode; 1.120 + 1.121 +/* If true do not output lines containing no delimiter characters. 1.122 + Otherwise, all such lines are printed. This option is valid only 1.123 + with field mode. */ 1.124 +static bool suppress_non_delimited; 1.125 + 1.126 +/* If true, print all bytes, characters, or fields _except_ 1.127 + those that were specified. */ 1.128 +static bool complement; 1.129 + 1.130 +/* The delimiter character for field mode. */ 1.131 +static unsigned char delim; 1.132 + 1.133 +/* True if the --output-delimiter=STRING option was specified. */ 1.134 +static bool output_delimiter_specified; 1.135 + 1.136 +/* The length of output_delimiter_string. */ 1.137 +static size_t output_delimiter_length; 1.138 + 1.139 +/* The output field separator string. Defaults to the 1-character 1.140 + string consisting of the input delimiter. */ 1.141 +static char *output_delimiter_string; 1.142 + 1.143 +/* True if we have ever read standard input. */ 1.144 +static bool have_read_stdin; 1.145 + 1.146 +/* For long options that have no equivalent short option, use a 1.147 + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ 1.148 +enum 1.149 +{ 1.150 + OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1, 1.151 + COMPLEMENT_OPTION 1.152 +}; 1.153 + 1.154 +static struct option const longopts[] = 1.155 +{ 1.156 + {"bytes", required_argument, NULL, 'b'}, 1.157 + {"characters", required_argument, NULL, 'c'}, 1.158 + {"fields", required_argument, NULL, 'f'}, 1.159 + {"delimiter", required_argument, NULL, 'd'}, 1.160 + {"only-delimited", no_argument, NULL, 's'}, 1.161 + {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION}, 1.162 + {"complement", no_argument, NULL, COMPLEMENT_OPTION}, 1.163 + {GETOPT_HELP_OPTION_DECL}, 1.164 + {GETOPT_VERSION_OPTION_DECL}, 1.165 + {NULL, 0, NULL, 0} 1.166 +}; 1.167 + 1.168 +void 1.169 +usage (int status) 1.170 +{ 1.171 + if (status != EXIT_SUCCESS) 1.172 + emit_try_help (); 1.173 + else 1.174 + { 1.175 + printf (_("\ 1.176 +Usage: %s OPTION... [FILE]...\n\ 1.177 +"), 1.178 + program_name); 1.179 + fputs (_("\ 1.180 +Print selected parts of lines from each FILE to standard output.\n\ 1.181 +"), stdout); 1.182 + 1.183 + emit_stdin_note (); 1.184 + emit_mandatory_arg_note (); 1.185 + 1.186 + fputs (_("\ 1.187 + -b, --bytes=LIST select only these bytes\n\ 1.188 + -c, --characters=LIST select only these characters\n\ 1.189 + -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\ 1.190 +"), stdout); 1.191 + fputs (_("\ 1.192 + -f, --fields=LIST select only these fields; also print any line\n\ 1.193 + that contains no delimiter character, unless\n\ 1.194 + the -s option is specified\n\ 1.195 + -n (ignored)\n\ 1.196 +"), stdout); 1.197 + fputs (_("\ 1.198 + --complement complement the set of selected bytes, characters\n\ 1.199 + or fields\n\ 1.200 +"), stdout); 1.201 + fputs (_("\ 1.202 + -s, --only-delimited do not print lines not containing delimiters\n\ 1.203 + --output-delimiter=STRING use STRING as the output delimiter\n\ 1.204 + the default is to use the input delimiter\n\ 1.205 +"), stdout); 1.206 + fputs (HELP_OPTION_DESCRIPTION, stdout); 1.207 + fputs (VERSION_OPTION_DESCRIPTION, stdout); 1.208 + fputs (_("\ 1.209 +\n\ 1.210 +Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\ 1.211 +range, or many ranges separated by commas. Selected input is written\n\ 1.212 +in the same order that it is read, and is written exactly once.\n\ 1.213 +"), stdout); 1.214 + fputs (_("\ 1.215 +Each range is one of:\n\ 1.216 +\n\ 1.217 + N N'th byte, character or field, counted from 1\n\ 1.218 + N- from N'th byte, character or field, to end of line\n\ 1.219 + N-M from N'th to M'th (included) byte, character or field\n\ 1.220 + -M from first to M'th (included) byte, character or field\n\ 1.221 +"), stdout); 1.222 + emit_ancillary_info (PROGRAM_NAME); 1.223 + } 1.224 + exit (status); 1.225 +} 1.226 + 1.227 +/* Comparison function for qsort to order the list of 1.228 + struct range_pairs. */ 1.229 +static int 1.230 +compare_ranges (const void *a, const void *b) 1.231 +{ 1.232 + int a_start = ((const struct range_pair *) a)->lo; 1.233 + int b_start = ((const struct range_pair *) b)->lo; 1.234 + return a_start < b_start ? -1 : a_start > b_start; 1.235 +} 1.236 + 1.237 +/* Reallocate Range Pair entries, with corresponding 1.238 + entries outside the range of each specified entry. */ 1.239 + 1.240 +static void 1.241 +complement_rp (void) 1.242 +{ 1.243 + if (complement) 1.244 + { 1.245 + struct range_pair *c = rp; 1.246 + size_t n = n_rp; 1.247 + size_t i; 1.248 + 1.249 + rp = NULL; 1.250 + n_rp = 0; 1.251 + n_rp_allocated = 0; 1.252 + 1.253 + if (c[0].lo > 1) 1.254 + add_range_pair (1, c[0].lo - 1); 1.255 + 1.256 + for (i = 1; i < n; ++i) 1.257 + { 1.258 + if (c[i-1].hi + 1 == c[i].lo) 1.259 + continue; 1.260 + 1.261 + add_range_pair (c[i-1].hi + 1, c[i].lo - 1); 1.262 + } 1.263 + 1.264 + if (c[n-1].hi < SIZE_MAX) 1.265 + add_range_pair (c[n-1].hi + 1, SIZE_MAX); 1.266 + 1.267 + free (c); 1.268 + } 1.269 +} 1.270 + 1.271 +/* Given the list of field or byte range specifications FIELDSTR, 1.272 + allocate and initialize the RP array. FIELDSTR should 1.273 + be composed of one or more numbers or ranges of numbers, separated 1.274 + by blanks or commas. Incomplete ranges may be given: '-m' means '1-m'; 1.275 + 'n-' means 'n' through end of line. 1.276 + Return true if FIELDSTR contains at least one field specification, 1.277 + false otherwise. */ 1.278 + 1.279 +static bool 1.280 +set_fields (const char *fieldstr) 1.281 +{ 1.282 + size_t initial = 1; /* Value of first number in a range. */ 1.283 + size_t value = 0; /* If nonzero, a number being accumulated. */ 1.284 + bool lhs_specified = false; 1.285 + bool rhs_specified = false; 1.286 + bool dash_found = false; /* True if a '-' is found in this field. */ 1.287 + bool field_found = false; /* True if at least one field spec 1.288 + has been processed. */ 1.289 + 1.290 + size_t i; 1.291 + bool in_digits = false; 1.292 + 1.293 + /* Collect and store in RP the range end points. */ 1.294 + 1.295 + while (true) 1.296 + { 1.297 + if (*fieldstr == '-') 1.298 + { 1.299 + in_digits = false; 1.300 + /* Starting a range. */ 1.301 + if (dash_found) 1.302 + FATAL_ERROR (_("invalid byte, character or field list")); 1.303 + dash_found = true; 1.304 + fieldstr++; 1.305 + 1.306 + if (lhs_specified && !value) 1.307 + FATAL_ERROR (_("fields and positions are numbered from 1")); 1.308 + 1.309 + initial = (lhs_specified ? value : 1); 1.310 + value = 0; 1.311 + } 1.312 + else if (*fieldstr == ',' 1.313 + || isblank (to_uchar (*fieldstr)) || *fieldstr == '\0') 1.314 + { 1.315 + in_digits = false; 1.316 + /* Ending the string, or this field/byte sublist. */ 1.317 + if (dash_found) 1.318 + { 1.319 + dash_found = false; 1.320 + 1.321 + if (!lhs_specified && !rhs_specified) 1.322 + FATAL_ERROR (_("invalid range with no endpoint: -")); 1.323 + 1.324 + /* A range. Possibilities: -n, m-n, n-. 1.325 + In any case, 'initial' contains the start of the range. */ 1.326 + if (!rhs_specified) 1.327 + { 1.328 + /* 'n-'. From 'initial' to end of line. */ 1.329 + add_range_pair (initial, SIZE_MAX); 1.330 + field_found = true; 1.331 + } 1.332 + else 1.333 + { 1.334 + /* 'm-n' or '-n' (1-n). */ 1.335 + if (value < initial) 1.336 + FATAL_ERROR (_("invalid decreasing range")); 1.337 + 1.338 + add_range_pair (initial, value); 1.339 + field_found = true; 1.340 + } 1.341 + value = 0; 1.342 + } 1.343 + else 1.344 + { 1.345 + /* A simple field number, not a range. */ 1.346 + if (value == 0) 1.347 + FATAL_ERROR (_("fields and positions are numbered from 1")); 1.348 + add_range_pair (value, value); 1.349 + value = 0; 1.350 + field_found = true; 1.351 + } 1.352 + 1.353 + if (*fieldstr == '\0') 1.354 + break; 1.355 + 1.356 + fieldstr++; 1.357 + lhs_specified = false; 1.358 + rhs_specified = false; 1.359 + } 1.360 + else if (ISDIGIT (*fieldstr)) 1.361 + { 1.362 + /* Record beginning of digit string, in case we have to 1.363 + complain about it. */ 1.364 + static char const *num_start; 1.365 + if (!in_digits || !num_start) 1.366 + num_start = fieldstr; 1.367 + in_digits = true; 1.368 + 1.369 + if (dash_found) 1.370 + rhs_specified = 1; 1.371 + else 1.372 + lhs_specified = 1; 1.373 + 1.374 + /* Detect overflow. */ 1.375 + if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t) 1.376 + || value == SIZE_MAX) 1.377 + { 1.378 + /* In case the user specified -c$(echo 2^64|bc),22, 1.379 + complain only about the first number. */ 1.380 + /* Determine the length of the offending number. */ 1.381 + size_t len = strspn (num_start, "0123456789"); 1.382 + char *bad_num = xstrndup (num_start, len); 1.383 + if (operating_mode == byte_mode) 1.384 + error (0, 0, 1.385 + _("byte offset %s is too large"), quote (bad_num)); 1.386 + else 1.387 + error (0, 0, 1.388 + _("field number %s is too large"), quote (bad_num)); 1.389 + free (bad_num); 1.390 + exit (EXIT_FAILURE); 1.391 + } 1.392 + 1.393 + fieldstr++; 1.394 + } 1.395 + else 1.396 + FATAL_ERROR (_("invalid byte, character or field list")); 1.397 + } 1.398 + 1.399 + qsort (rp, n_rp, sizeof (rp[0]), compare_ranges); 1.400 + 1.401 + /* Merge range pairs (e.g. `2-5,3-4' becomes `2-5'). */ 1.402 + for (i = 0; i < n_rp; ++i) 1.403 + { 1.404 + for (size_t j = i + 1; j < n_rp; ++j) 1.405 + { 1.406 + if (rp[j].lo <= rp[i].hi) 1.407 + { 1.408 + rp[i].hi = MAX (rp[j].hi, rp[i].hi); 1.409 + memmove (rp + j, rp + j + 1, (n_rp - j - 1) * sizeof *rp); 1.410 + n_rp--; 1.411 + j--; 1.412 + } 1.413 + else 1.414 + break; 1.415 + } 1.416 + } 1.417 + 1.418 + complement_rp (); 1.419 + 1.420 + /* After merging, reallocate RP so we release memory to the system. 1.421 + Also add a sentinel at the end of RP, to avoid out of bounds access 1.422 + and for performance reasons. */ 1.423 + ++n_rp; 1.424 + rp = xrealloc (rp, n_rp * sizeof (struct range_pair)); 1.425 + rp[n_rp - 1].lo = rp[n_rp - 1].hi = SIZE_MAX; 1.426 + 1.427 + return field_found; 1.428 +} 1.429 + 1.430 +/* Increment *ITEM_IDX (i.e., a field or byte index), 1.431 + and if required CURRENT_RP. */ 1.432 + 1.433 +static inline void 1.434 +next_item (size_t *item_idx) 1.435 +{ 1.436 + (*item_idx)++; 1.437 + if ((*item_idx) > current_rp->hi) 1.438 + current_rp++; 1.439 +} 1.440 + 1.441 +/* Return nonzero if the K'th field or byte is printable. */ 1.442 + 1.443 +static inline bool 1.444 +print_kth (size_t k) 1.445 +{ 1.446 + return current_rp->lo <= k; 1.447 +} 1.448 + 1.449 +/* Return nonzero if K'th byte is the beginning of a range. */ 1.450 + 1.451 +static inline bool 1.452 +is_range_start_index (size_t k) 1.453 +{ 1.454 + return k == current_rp->lo; 1.455 +} 1.456 + 1.457 +/* Read from stream STREAM, printing to standard output any selected bytes. */ 1.458 + 1.459 +static void 1.460 +cut_bytes (FILE *stream) 1.461 +{ 1.462 + size_t byte_idx; /* Number of bytes in the line so far. */ 1.463 + /* Whether to begin printing delimiters between ranges for the current line. 1.464 + Set after we've begun printing data corresponding to the first range. */ 1.465 + bool print_delimiter; 1.466 + 1.467 + byte_idx = 0; 1.468 + print_delimiter = false; 1.469 + current_rp = rp; 1.470 + while (true) 1.471 + { 1.472 + int c; /* Each character from the file. */ 1.473 + 1.474 + c = getc (stream); 1.475 + 1.476 + if (c == '\n') 1.477 + { 1.478 + putchar ('\n'); 1.479 + byte_idx = 0; 1.480 + print_delimiter = false; 1.481 + current_rp = rp; 1.482 + } 1.483 + else if (c == EOF) 1.484 + { 1.485 + if (byte_idx > 0) 1.486 + putchar ('\n'); 1.487 + break; 1.488 + } 1.489 + else 1.490 + { 1.491 + next_item (&byte_idx); 1.492 + if (print_kth (byte_idx)) 1.493 + { 1.494 + if (output_delimiter_specified) 1.495 + { 1.496 + if (print_delimiter && is_range_start_index (byte_idx)) 1.497 + { 1.498 + fwrite (output_delimiter_string, sizeof (char), 1.499 + output_delimiter_length, stdout); 1.500 + } 1.501 + print_delimiter = true; 1.502 + } 1.503 + 1.504 + putchar (c); 1.505 + } 1.506 + } 1.507 + } 1.508 +} 1.509 + 1.510 +/* Read from stream STREAM, printing to standard output any selected fields. */ 1.511 + 1.512 +static void 1.513 +cut_fields (FILE *stream) 1.514 +{ 1.515 + int c; 1.516 + size_t field_idx = 1; 1.517 + bool found_any_selected_field = false; 1.518 + bool buffer_first_field; 1.519 + 1.520 + current_rp = rp; 1.521 + 1.522 + c = getc (stream); 1.523 + if (c == EOF) 1.524 + return; 1.525 + 1.526 + ungetc (c, stream); 1.527 + c = 0; 1.528 + 1.529 + /* To support the semantics of the -s flag, we may have to buffer 1.530 + all of the first field to determine whether it is 'delimited.' 1.531 + But that is unnecessary if all non-delimited lines must be printed 1.532 + and the first field has been selected, or if non-delimited lines 1.533 + must be suppressed and the first field has *not* been selected. 1.534 + That is because a non-delimited line has exactly one field. */ 1.535 + buffer_first_field = (suppress_non_delimited ^ !print_kth (1)); 1.536 + 1.537 + while (1) 1.538 + { 1.539 + if (field_idx == 1 && buffer_first_field) 1.540 + { 1.541 + ssize_t len; 1.542 + size_t n_bytes; 1.543 + 1.544 + len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0, 1.545 + GETNLINE_NO_LIMIT, delim, '\n', stream); 1.546 + if (len < 0) 1.547 + { 1.548 + free (field_1_buffer); 1.549 + field_1_buffer = NULL; 1.550 + if (ferror (stream) || feof (stream)) 1.551 + break; 1.552 + xalloc_die (); 1.553 + } 1.554 + 1.555 + n_bytes = len; 1.556 + assert (n_bytes != 0); 1.557 + 1.558 + c = 0; 1.559 + 1.560 + /* If the first field extends to the end of line (it is not 1.561 + delimited) and we are printing all non-delimited lines, 1.562 + print this one. */ 1.563 + if (to_uchar (field_1_buffer[n_bytes - 1]) != delim) 1.564 + { 1.565 + if (suppress_non_delimited) 1.566 + { 1.567 + /* Empty. */ 1.568 + } 1.569 + else 1.570 + { 1.571 + fwrite (field_1_buffer, sizeof (char), n_bytes, stdout); 1.572 + /* Make sure the output line is newline terminated. */ 1.573 + if (field_1_buffer[n_bytes - 1] != '\n') 1.574 + putchar ('\n'); 1.575 + c = '\n'; 1.576 + } 1.577 + continue; 1.578 + } 1.579 + if (print_kth (1)) 1.580 + { 1.581 + /* Print the field, but not the trailing delimiter. */ 1.582 + fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout); 1.583 + 1.584 + /* With -d$'\n' don't treat the last '\n' as a delimiter. */ 1.585 + if (delim == '\n') 1.586 + { 1.587 + int last_c = getc (stream); 1.588 + if (last_c != EOF) 1.589 + { 1.590 + ungetc (last_c, stream); 1.591 + found_any_selected_field = true; 1.592 + } 1.593 + } 1.594 + else 1.595 + found_any_selected_field = true; 1.596 + } 1.597 + next_item (&field_idx); 1.598 + } 1.599 + 1.600 + int prev_c = c; 1.601 + 1.602 + if (print_kth (field_idx)) 1.603 + { 1.604 + if (found_any_selected_field) 1.605 + { 1.606 + fwrite (output_delimiter_string, sizeof (char), 1.607 + output_delimiter_length, stdout); 1.608 + } 1.609 + found_any_selected_field = true; 1.610 + 1.611 + while ((c = getc (stream)) != delim && c != '\n' && c != EOF) 1.612 + { 1.613 + putchar (c); 1.614 + prev_c = c; 1.615 + } 1.616 + } 1.617 + else 1.618 + { 1.619 + while ((c = getc (stream)) != delim && c != '\n' && c != EOF) 1.620 + { 1.621 + prev_c = c; 1.622 + } 1.623 + } 1.624 + 1.625 + /* With -d$'\n' don't treat the last '\n' as a delimiter. */ 1.626 + if (delim == '\n' && c == delim) 1.627 + { 1.628 + int last_c = getc (stream); 1.629 + if (last_c != EOF) 1.630 + ungetc (last_c, stream); 1.631 + else 1.632 + c = last_c; 1.633 + } 1.634 + 1.635 + if (c == delim) 1.636 + next_item (&field_idx); 1.637 + else if (c == '\n' || c == EOF) 1.638 + { 1.639 + if (found_any_selected_field 1.640 + || !(suppress_non_delimited && field_idx == 1)) 1.641 + { 1.642 + if (c == '\n' || prev_c != '\n' || delim == '\n') 1.643 + putchar ('\n'); 1.644 + } 1.645 + if (c == EOF) 1.646 + break; 1.647 + field_idx = 1; 1.648 + current_rp = rp; 1.649 + found_any_selected_field = false; 1.650 + } 1.651 + } 1.652 +} 1.653 + 1.654 +static void 1.655 +cut_stream (FILE *stream) 1.656 +{ 1.657 + if (operating_mode == byte_mode) 1.658 + cut_bytes (stream); 1.659 + else 1.660 + cut_fields (stream); 1.661 +} 1.662 + 1.663 +/* Process file FILE to standard output. 1.664 + Return true if successful. */ 1.665 + 1.666 +static bool 1.667 +cut_file (char const *file) 1.668 +{ 1.669 + FILE *stream; 1.670 + 1.671 + if (STREQ (file, "-")) 1.672 + { 1.673 + have_read_stdin = true; 1.674 + stream = stdin; 1.675 + } 1.676 + else 1.677 + { 1.678 + stream = fopen (file, "r"); 1.679 + if (stream == NULL) 1.680 + { 1.681 + error (0, errno, "%s", file); 1.682 + return false; 1.683 + } 1.684 + } 1.685 + 1.686 + fadvise (stream, FADVISE_SEQUENTIAL); 1.687 + 1.688 + cut_stream (stream); 1.689 + 1.690 + if (ferror (stream)) 1.691 + { 1.692 + error (0, errno, "%s", file); 1.693 + return false; 1.694 + } 1.695 + if (STREQ (file, "-")) 1.696 + clearerr (stream); /* Also clear EOF. */ 1.697 + else if (fclose (stream) == EOF) 1.698 + { 1.699 + error (0, errno, "%s", file); 1.700 + return false; 1.701 + } 1.702 + return true; 1.703 +} 1.704 + 1.705 +int 1.706 +main (int argc, char **argv) 1.707 +{ 1.708 + int optc; 1.709 + bool ok; 1.710 + bool delim_specified = false; 1.711 + char *spec_list_string IF_LINT ( = NULL); 1.712 + 1.713 + initialize_main (&argc, &argv); 1.714 + set_program_name (argv[0]); 1.715 + setlocale (LC_ALL, ""); 1.716 + bindtextdomain (PACKAGE, LOCALEDIR); 1.717 + textdomain (PACKAGE); 1.718 + 1.719 + atexit (close_stdout); 1.720 + 1.721 + operating_mode = undefined_mode; 1.722 + 1.723 + /* By default, all non-delimited lines are printed. */ 1.724 + suppress_non_delimited = false; 1.725 + 1.726 + delim = '\0'; 1.727 + have_read_stdin = false; 1.728 + 1.729 + while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1) 1.730 + { 1.731 + switch (optc) 1.732 + { 1.733 + case 'b': 1.734 + case 'c': 1.735 + /* Build the byte list. */ 1.736 + if (operating_mode != undefined_mode) 1.737 + FATAL_ERROR (_("only one type of list may be specified")); 1.738 + operating_mode = byte_mode; 1.739 + spec_list_string = optarg; 1.740 + break; 1.741 + 1.742 + case 'f': 1.743 + /* Build the field list. */ 1.744 + if (operating_mode != undefined_mode) 1.745 + FATAL_ERROR (_("only one type of list may be specified")); 1.746 + operating_mode = field_mode; 1.747 + spec_list_string = optarg; 1.748 + break; 1.749 + 1.750 + case 'd': 1.751 + /* New delimiter. */ 1.752 + /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */ 1.753 + if (optarg[0] != '\0' && optarg[1] != '\0') 1.754 + FATAL_ERROR (_("the delimiter must be a single character")); 1.755 + delim = optarg[0]; 1.756 + delim_specified = true; 1.757 + break; 1.758 + 1.759 + case OUTPUT_DELIMITER_OPTION: 1.760 + output_delimiter_specified = true; 1.761 + /* Interpret --output-delimiter='' to mean 1.762 + 'use the NUL byte as the delimiter.' */ 1.763 + output_delimiter_length = (optarg[0] == '\0' 1.764 + ? 1 : strlen (optarg)); 1.765 + output_delimiter_string = xstrdup (optarg); 1.766 + break; 1.767 + 1.768 + case 'n': 1.769 + break; 1.770 + 1.771 + case 's': 1.772 + suppress_non_delimited = true; 1.773 + break; 1.774 + 1.775 + case COMPLEMENT_OPTION: 1.776 + complement = true; 1.777 + break; 1.778 + 1.779 + case_GETOPT_HELP_CHAR; 1.780 + 1.781 + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); 1.782 + 1.783 + default: 1.784 + usage (EXIT_FAILURE); 1.785 + } 1.786 + } 1.787 + 1.788 + if (operating_mode == undefined_mode) 1.789 + FATAL_ERROR (_("you must specify a list of bytes, characters, or fields")); 1.790 + 1.791 + if (delim_specified && operating_mode != field_mode) 1.792 + FATAL_ERROR (_("an input delimiter may be specified only\ 1.793 + when operating on fields")); 1.794 + 1.795 + if (suppress_non_delimited && operating_mode != field_mode) 1.796 + FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\ 1.797 +\tonly when operating on fields")); 1.798 + 1.799 + if (! set_fields (spec_list_string)) 1.800 + { 1.801 + if (operating_mode == field_mode) 1.802 + FATAL_ERROR (_("missing list of fields")); 1.803 + else 1.804 + FATAL_ERROR (_("missing list of positions")); 1.805 + } 1.806 + 1.807 + if (!delim_specified) 1.808 + delim = '\t'; 1.809 + 1.810 + if (output_delimiter_string == NULL) 1.811 + { 1.812 + static char dummy[2]; 1.813 + dummy[0] = delim; 1.814 + dummy[1] = '\0'; 1.815 + output_delimiter_string = dummy; 1.816 + output_delimiter_length = 1; 1.817 + } 1.818 + 1.819 + if (optind == argc) 1.820 + ok = cut_file ("-"); 1.821 + else 1.822 + for (ok = true; optind < argc; optind++) 1.823 + ok &= cut_file (argv[optind]); 1.824 + 1.825 + 1.826 + if (have_read_stdin && fclose (stdin) == EOF) 1.827 + { 1.828 + error (0, errno, "-"); 1.829 + ok = false; 1.830 + } 1.831 + 1.832 + return ok ? EXIT_SUCCESS : EXIT_FAILURE; 1.833 +}