docs/cut

annotate code/cut.c__gnu.2015-05-01 @ 21:bac481be86d7

Umlaute konvertiert
author markus schnalke <meillo@marmaro.de>
date Thu, 28 May 2015 06:41:08 +0200
parents
children
rev   line source
meillo@14 1 /* cut - remove parts of lines of files
meillo@14 2 Copyright (C) 1997-2015 Free Software Foundation, Inc.
meillo@14 3 Copyright (C) 1984 David M. Ihnat
meillo@14 4
meillo@14 5 This program is free software: you can redistribute it and/or modify
meillo@14 6 it under the terms of the GNU General Public License as published by
meillo@14 7 the Free Software Foundation, either version 3 of the License, or
meillo@14 8 (at your option) any later version.
meillo@14 9
meillo@14 10 This program is distributed in the hope that it will be useful,
meillo@14 11 but WITHOUT ANY WARRANTY; without even the implied warranty of
meillo@14 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
meillo@14 13 GNU General Public License for more details.
meillo@14 14
meillo@14 15 You should have received a copy of the GNU General Public License
meillo@14 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
meillo@14 17
meillo@14 18 /* Written by David Ihnat. */
meillo@14 19
meillo@14 20 /* POSIX changes, bug fixes, long-named options, and cleanup
meillo@14 21 by David MacKenzie <djm@gnu.ai.mit.edu>.
meillo@14 22
meillo@14 23 Rewrite cut_fields and cut_bytes -- Jim Meyering. */
meillo@14 24
meillo@14 25 #include <config.h>
meillo@14 26
meillo@14 27 #include <stdio.h>
meillo@14 28 #include <assert.h>
meillo@14 29 #include <getopt.h>
meillo@14 30 #include <sys/types.h>
meillo@14 31 #include "system.h"
meillo@14 32
meillo@14 33 #include "error.h"
meillo@14 34 #include "fadvise.h"
meillo@14 35 #include "getndelim2.h"
meillo@14 36 #include "hash.h"
meillo@14 37 #include "quote.h"
meillo@14 38 #include "xstrndup.h"
meillo@14 39
meillo@14 40 /* The official name of this program (e.g., no 'g' prefix). */
meillo@14 41 #define PROGRAM_NAME "cut"
meillo@14 42
meillo@14 43 #define AUTHORS \
meillo@14 44 proper_name ("David M. Ihnat"), \
meillo@14 45 proper_name ("David MacKenzie"), \
meillo@14 46 proper_name ("Jim Meyering")
meillo@14 47
meillo@14 48 #define FATAL_ERROR(Message) \
meillo@14 49 do \
meillo@14 50 { \
meillo@14 51 error (0, 0, (Message)); \
meillo@14 52 usage (EXIT_FAILURE); \
meillo@14 53 } \
meillo@14 54 while (0)
meillo@14 55
meillo@14 56
meillo@14 57 struct range_pair
meillo@14 58 {
meillo@14 59 size_t lo;
meillo@14 60 size_t hi;
meillo@14 61 };
meillo@14 62
meillo@14 63 /* Array of `struct range_pair' holding all the finite ranges. */
meillo@14 64 static struct range_pair *rp;
meillo@14 65
meillo@14 66 /* Pointer inside RP. When checking if a byte or field is selected
meillo@14 67 by a finite range, we check if it is between CURRENT_RP.LO
meillo@14 68 and CURRENT_RP.HI. If the byte or field index is greater than
meillo@14 69 CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */
meillo@14 70 static struct range_pair *current_rp;
meillo@14 71
meillo@14 72 /* Number of finite ranges specified by the user. */
meillo@14 73 static size_t n_rp;
meillo@14 74
meillo@14 75 /* Number of `struct range_pair's allocated. */
meillo@14 76 static size_t n_rp_allocated;
meillo@14 77
meillo@14 78
meillo@14 79 /* Append LOW, HIGH to the list RP of range pairs, allocating additional
meillo@14 80 space if necessary. Update global variable N_RP. When allocating,
meillo@14 81 update global variable N_RP_ALLOCATED. */
meillo@14 82
meillo@14 83 static void
meillo@14 84 add_range_pair (size_t lo, size_t hi)
meillo@14 85 {
meillo@14 86 if (n_rp == n_rp_allocated)
meillo@14 87 rp = X2NREALLOC (rp, &n_rp_allocated);
meillo@14 88 rp[n_rp].lo = lo;
meillo@14 89 rp[n_rp].hi = hi;
meillo@14 90 ++n_rp;
meillo@14 91 }
meillo@14 92
meillo@14 93 /* This buffer is used to support the semantics of the -s option
meillo@14 94 (or lack of same) when the specified field list includes (does
meillo@14 95 not include) the first field. In both of those cases, the entire
meillo@14 96 first field must be read into this buffer to determine whether it
meillo@14 97 is followed by a delimiter or a newline before any of it may be
meillo@14 98 output. Otherwise, cut_fields can do the job without using this
meillo@14 99 buffer. */
meillo@14 100 static char *field_1_buffer;
meillo@14 101
meillo@14 102 /* The number of bytes allocated for FIELD_1_BUFFER. */
meillo@14 103 static size_t field_1_bufsize;
meillo@14 104
meillo@14 105 enum operating_mode
meillo@14 106 {
meillo@14 107 undefined_mode,
meillo@14 108
meillo@14 109 /* Output characters that are in the given bytes. */
meillo@14 110 byte_mode,
meillo@14 111
meillo@14 112 /* Output the given delimiter-separated fields. */
meillo@14 113 field_mode
meillo@14 114 };
meillo@14 115
meillo@14 116 static enum operating_mode operating_mode;
meillo@14 117
meillo@14 118 /* If true do not output lines containing no delimiter characters.
meillo@14 119 Otherwise, all such lines are printed. This option is valid only
meillo@14 120 with field mode. */
meillo@14 121 static bool suppress_non_delimited;
meillo@14 122
meillo@14 123 /* If true, print all bytes, characters, or fields _except_
meillo@14 124 those that were specified. */
meillo@14 125 static bool complement;
meillo@14 126
meillo@14 127 /* The delimiter character for field mode. */
meillo@14 128 static unsigned char delim;
meillo@14 129
meillo@14 130 /* True if the --output-delimiter=STRING option was specified. */
meillo@14 131 static bool output_delimiter_specified;
meillo@14 132
meillo@14 133 /* The length of output_delimiter_string. */
meillo@14 134 static size_t output_delimiter_length;
meillo@14 135
meillo@14 136 /* The output field separator string. Defaults to the 1-character
meillo@14 137 string consisting of the input delimiter. */
meillo@14 138 static char *output_delimiter_string;
meillo@14 139
meillo@14 140 /* True if we have ever read standard input. */
meillo@14 141 static bool have_read_stdin;
meillo@14 142
meillo@14 143 /* For long options that have no equivalent short option, use a
meillo@14 144 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
meillo@14 145 enum
meillo@14 146 {
meillo@14 147 OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1,
meillo@14 148 COMPLEMENT_OPTION
meillo@14 149 };
meillo@14 150
meillo@14 151 static struct option const longopts[] =
meillo@14 152 {
meillo@14 153 {"bytes", required_argument, NULL, 'b'},
meillo@14 154 {"characters", required_argument, NULL, 'c'},
meillo@14 155 {"fields", required_argument, NULL, 'f'},
meillo@14 156 {"delimiter", required_argument, NULL, 'd'},
meillo@14 157 {"only-delimited", no_argument, NULL, 's'},
meillo@14 158 {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
meillo@14 159 {"complement", no_argument, NULL, COMPLEMENT_OPTION},
meillo@14 160 {GETOPT_HELP_OPTION_DECL},
meillo@14 161 {GETOPT_VERSION_OPTION_DECL},
meillo@14 162 {NULL, 0, NULL, 0}
meillo@14 163 };
meillo@14 164
meillo@14 165 void
meillo@14 166 usage (int status)
meillo@14 167 {
meillo@14 168 if (status != EXIT_SUCCESS)
meillo@14 169 emit_try_help ();
meillo@14 170 else
meillo@14 171 {
meillo@14 172 printf (_("\
meillo@14 173 Usage: %s OPTION... [FILE]...\n\
meillo@14 174 "),
meillo@14 175 program_name);
meillo@14 176 fputs (_("\
meillo@14 177 Print selected parts of lines from each FILE to standard output.\n\
meillo@14 178 "), stdout);
meillo@14 179
meillo@14 180 emit_stdin_note ();
meillo@14 181 emit_mandatory_arg_note ();
meillo@14 182
meillo@14 183 fputs (_("\
meillo@14 184 -b, --bytes=LIST select only these bytes\n\
meillo@14 185 -c, --characters=LIST select only these characters\n\
meillo@14 186 -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\
meillo@14 187 "), stdout);
meillo@14 188 fputs (_("\
meillo@14 189 -f, --fields=LIST select only these fields; also print any line\n\
meillo@14 190 that contains no delimiter character, unless\n\
meillo@14 191 the -s option is specified\n\
meillo@14 192 -n (ignored)\n\
meillo@14 193 "), stdout);
meillo@14 194 fputs (_("\
meillo@14 195 --complement complement the set of selected bytes, characters\n\
meillo@14 196 or fields\n\
meillo@14 197 "), stdout);
meillo@14 198 fputs (_("\
meillo@14 199 -s, --only-delimited do not print lines not containing delimiters\n\
meillo@14 200 --output-delimiter=STRING use STRING as the output delimiter\n\
meillo@14 201 the default is to use the input delimiter\n\
meillo@14 202 "), stdout);
meillo@14 203 fputs (HELP_OPTION_DESCRIPTION, stdout);
meillo@14 204 fputs (VERSION_OPTION_DESCRIPTION, stdout);
meillo@14 205 fputs (_("\
meillo@14 206 \n\
meillo@14 207 Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\
meillo@14 208 range, or many ranges separated by commas. Selected input is written\n\
meillo@14 209 in the same order that it is read, and is written exactly once.\n\
meillo@14 210 "), stdout);
meillo@14 211 fputs (_("\
meillo@14 212 Each range is one of:\n\
meillo@14 213 \n\
meillo@14 214 N N'th byte, character or field, counted from 1\n\
meillo@14 215 N- from N'th byte, character or field, to end of line\n\
meillo@14 216 N-M from N'th to M'th (included) byte, character or field\n\
meillo@14 217 -M from first to M'th (included) byte, character or field\n\
meillo@14 218 "), stdout);
meillo@14 219 emit_ancillary_info (PROGRAM_NAME);
meillo@14 220 }
meillo@14 221 exit (status);
meillo@14 222 }
meillo@14 223
meillo@14 224 /* Comparison function for qsort to order the list of
meillo@14 225 struct range_pairs. */
meillo@14 226 static int
meillo@14 227 compare_ranges (const void *a, const void *b)
meillo@14 228 {
meillo@14 229 int a_start = ((const struct range_pair *) a)->lo;
meillo@14 230 int b_start = ((const struct range_pair *) b)->lo;
meillo@14 231 return a_start < b_start ? -1 : a_start > b_start;
meillo@14 232 }
meillo@14 233
meillo@14 234 /* Reallocate Range Pair entries, with corresponding
meillo@14 235 entries outside the range of each specified entry. */
meillo@14 236
meillo@14 237 static void
meillo@14 238 complement_rp (void)
meillo@14 239 {
meillo@14 240 if (complement)
meillo@14 241 {
meillo@14 242 struct range_pair *c = rp;
meillo@14 243 size_t n = n_rp;
meillo@14 244 size_t i;
meillo@14 245
meillo@14 246 rp = NULL;
meillo@14 247 n_rp = 0;
meillo@14 248 n_rp_allocated = 0;
meillo@14 249
meillo@14 250 if (c[0].lo > 1)
meillo@14 251 add_range_pair (1, c[0].lo - 1);
meillo@14 252
meillo@14 253 for (i = 1; i < n; ++i)
meillo@14 254 {
meillo@14 255 if (c[i-1].hi + 1 == c[i].lo)
meillo@14 256 continue;
meillo@14 257
meillo@14 258 add_range_pair (c[i-1].hi + 1, c[i].lo - 1);
meillo@14 259 }
meillo@14 260
meillo@14 261 if (c[n-1].hi < SIZE_MAX)
meillo@14 262 add_range_pair (c[n-1].hi + 1, SIZE_MAX);
meillo@14 263
meillo@14 264 free (c);
meillo@14 265 }
meillo@14 266 }
meillo@14 267
meillo@14 268 /* Given the list of field or byte range specifications FIELDSTR,
meillo@14 269 allocate and initialize the RP array. FIELDSTR should
meillo@14 270 be composed of one or more numbers or ranges of numbers, separated
meillo@14 271 by blanks or commas. Incomplete ranges may be given: '-m' means '1-m';
meillo@14 272 'n-' means 'n' through end of line.
meillo@14 273 Return true if FIELDSTR contains at least one field specification,
meillo@14 274 false otherwise. */
meillo@14 275
meillo@14 276 static bool
meillo@14 277 set_fields (const char *fieldstr)
meillo@14 278 {
meillo@14 279 size_t initial = 1; /* Value of first number in a range. */
meillo@14 280 size_t value = 0; /* If nonzero, a number being accumulated. */
meillo@14 281 bool lhs_specified = false;
meillo@14 282 bool rhs_specified = false;
meillo@14 283 bool dash_found = false; /* True if a '-' is found in this field. */
meillo@14 284 bool field_found = false; /* True if at least one field spec
meillo@14 285 has been processed. */
meillo@14 286
meillo@14 287 size_t i;
meillo@14 288 bool in_digits = false;
meillo@14 289
meillo@14 290 /* Collect and store in RP the range end points. */
meillo@14 291
meillo@14 292 while (true)
meillo@14 293 {
meillo@14 294 if (*fieldstr == '-')
meillo@14 295 {
meillo@14 296 in_digits = false;
meillo@14 297 /* Starting a range. */
meillo@14 298 if (dash_found)
meillo@14 299 FATAL_ERROR (_("invalid byte, character or field list"));
meillo@14 300 dash_found = true;
meillo@14 301 fieldstr++;
meillo@14 302
meillo@14 303 if (lhs_specified && !value)
meillo@14 304 FATAL_ERROR (_("fields and positions are numbered from 1"));
meillo@14 305
meillo@14 306 initial = (lhs_specified ? value : 1);
meillo@14 307 value = 0;
meillo@14 308 }
meillo@14 309 else if (*fieldstr == ','
meillo@14 310 || isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
meillo@14 311 {
meillo@14 312 in_digits = false;
meillo@14 313 /* Ending the string, or this field/byte sublist. */
meillo@14 314 if (dash_found)
meillo@14 315 {
meillo@14 316 dash_found = false;
meillo@14 317
meillo@14 318 if (!lhs_specified && !rhs_specified)
meillo@14 319 FATAL_ERROR (_("invalid range with no endpoint: -"));
meillo@14 320
meillo@14 321 /* A range. Possibilities: -n, m-n, n-.
meillo@14 322 In any case, 'initial' contains the start of the range. */
meillo@14 323 if (!rhs_specified)
meillo@14 324 {
meillo@14 325 /* 'n-'. From 'initial' to end of line. */
meillo@14 326 add_range_pair (initial, SIZE_MAX);
meillo@14 327 field_found = true;
meillo@14 328 }
meillo@14 329 else
meillo@14 330 {
meillo@14 331 /* 'm-n' or '-n' (1-n). */
meillo@14 332 if (value < initial)
meillo@14 333 FATAL_ERROR (_("invalid decreasing range"));
meillo@14 334
meillo@14 335 add_range_pair (initial, value);
meillo@14 336 field_found = true;
meillo@14 337 }
meillo@14 338 value = 0;
meillo@14 339 }
meillo@14 340 else
meillo@14 341 {
meillo@14 342 /* A simple field number, not a range. */
meillo@14 343 if (value == 0)
meillo@14 344 FATAL_ERROR (_("fields and positions are numbered from 1"));
meillo@14 345 add_range_pair (value, value);
meillo@14 346 value = 0;
meillo@14 347 field_found = true;
meillo@14 348 }
meillo@14 349
meillo@14 350 if (*fieldstr == '\0')
meillo@14 351 break;
meillo@14 352
meillo@14 353 fieldstr++;
meillo@14 354 lhs_specified = false;
meillo@14 355 rhs_specified = false;
meillo@14 356 }
meillo@14 357 else if (ISDIGIT (*fieldstr))
meillo@14 358 {
meillo@14 359 /* Record beginning of digit string, in case we have to
meillo@14 360 complain about it. */
meillo@14 361 static char const *num_start;
meillo@14 362 if (!in_digits || !num_start)
meillo@14 363 num_start = fieldstr;
meillo@14 364 in_digits = true;
meillo@14 365
meillo@14 366 if (dash_found)
meillo@14 367 rhs_specified = 1;
meillo@14 368 else
meillo@14 369 lhs_specified = 1;
meillo@14 370
meillo@14 371 /* Detect overflow. */
meillo@14 372 if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t)
meillo@14 373 || value == SIZE_MAX)
meillo@14 374 {
meillo@14 375 /* In case the user specified -c$(echo 2^64|bc),22,
meillo@14 376 complain only about the first number. */
meillo@14 377 /* Determine the length of the offending number. */
meillo@14 378 size_t len = strspn (num_start, "0123456789");
meillo@14 379 char *bad_num = xstrndup (num_start, len);
meillo@14 380 if (operating_mode == byte_mode)
meillo@14 381 error (0, 0,
meillo@14 382 _("byte offset %s is too large"), quote (bad_num));
meillo@14 383 else
meillo@14 384 error (0, 0,
meillo@14 385 _("field number %s is too large"), quote (bad_num));
meillo@14 386 free (bad_num);
meillo@14 387 exit (EXIT_FAILURE);
meillo@14 388 }
meillo@14 389
meillo@14 390 fieldstr++;
meillo@14 391 }
meillo@14 392 else
meillo@14 393 FATAL_ERROR (_("invalid byte, character or field list"));
meillo@14 394 }
meillo@14 395
meillo@14 396 qsort (rp, n_rp, sizeof (rp[0]), compare_ranges);
meillo@14 397
meillo@14 398 /* Merge range pairs (e.g. `2-5,3-4' becomes `2-5'). */
meillo@14 399 for (i = 0; i < n_rp; ++i)
meillo@14 400 {
meillo@14 401 for (size_t j = i + 1; j < n_rp; ++j)
meillo@14 402 {
meillo@14 403 if (rp[j].lo <= rp[i].hi)
meillo@14 404 {
meillo@14 405 rp[i].hi = MAX (rp[j].hi, rp[i].hi);
meillo@14 406 memmove (rp + j, rp + j + 1, (n_rp - j - 1) * sizeof *rp);
meillo@14 407 n_rp--;
meillo@14 408 j--;
meillo@14 409 }
meillo@14 410 else
meillo@14 411 break;
meillo@14 412 }
meillo@14 413 }
meillo@14 414
meillo@14 415 complement_rp ();
meillo@14 416
meillo@14 417 /* After merging, reallocate RP so we release memory to the system.
meillo@14 418 Also add a sentinel at the end of RP, to avoid out of bounds access
meillo@14 419 and for performance reasons. */
meillo@14 420 ++n_rp;
meillo@14 421 rp = xrealloc (rp, n_rp * sizeof (struct range_pair));
meillo@14 422 rp[n_rp - 1].lo = rp[n_rp - 1].hi = SIZE_MAX;
meillo@14 423
meillo@14 424 return field_found;
meillo@14 425 }
meillo@14 426
meillo@14 427 /* Increment *ITEM_IDX (i.e., a field or byte index),
meillo@14 428 and if required CURRENT_RP. */
meillo@14 429
meillo@14 430 static inline void
meillo@14 431 next_item (size_t *item_idx)
meillo@14 432 {
meillo@14 433 (*item_idx)++;
meillo@14 434 if ((*item_idx) > current_rp->hi)
meillo@14 435 current_rp++;
meillo@14 436 }
meillo@14 437
meillo@14 438 /* Return nonzero if the K'th field or byte is printable. */
meillo@14 439
meillo@14 440 static inline bool
meillo@14 441 print_kth (size_t k)
meillo@14 442 {
meillo@14 443 return current_rp->lo <= k;
meillo@14 444 }
meillo@14 445
meillo@14 446 /* Return nonzero if K'th byte is the beginning of a range. */
meillo@14 447
meillo@14 448 static inline bool
meillo@14 449 is_range_start_index (size_t k)
meillo@14 450 {
meillo@14 451 return k == current_rp->lo;
meillo@14 452 }
meillo@14 453
meillo@14 454 /* Read from stream STREAM, printing to standard output any selected bytes. */
meillo@14 455
meillo@14 456 static void
meillo@14 457 cut_bytes (FILE *stream)
meillo@14 458 {
meillo@14 459 size_t byte_idx; /* Number of bytes in the line so far. */
meillo@14 460 /* Whether to begin printing delimiters between ranges for the current line.
meillo@14 461 Set after we've begun printing data corresponding to the first range. */
meillo@14 462 bool print_delimiter;
meillo@14 463
meillo@14 464 byte_idx = 0;
meillo@14 465 print_delimiter = false;
meillo@14 466 current_rp = rp;
meillo@14 467 while (true)
meillo@14 468 {
meillo@14 469 int c; /* Each character from the file. */
meillo@14 470
meillo@14 471 c = getc (stream);
meillo@14 472
meillo@14 473 if (c == '\n')
meillo@14 474 {
meillo@14 475 putchar ('\n');
meillo@14 476 byte_idx = 0;
meillo@14 477 print_delimiter = false;
meillo@14 478 current_rp = rp;
meillo@14 479 }
meillo@14 480 else if (c == EOF)
meillo@14 481 {
meillo@14 482 if (byte_idx > 0)
meillo@14 483 putchar ('\n');
meillo@14 484 break;
meillo@14 485 }
meillo@14 486 else
meillo@14 487 {
meillo@14 488 next_item (&byte_idx);
meillo@14 489 if (print_kth (byte_idx))
meillo@14 490 {
meillo@14 491 if (output_delimiter_specified)
meillo@14 492 {
meillo@14 493 if (print_delimiter && is_range_start_index (byte_idx))
meillo@14 494 {
meillo@14 495 fwrite (output_delimiter_string, sizeof (char),
meillo@14 496 output_delimiter_length, stdout);
meillo@14 497 }
meillo@14 498 print_delimiter = true;
meillo@14 499 }
meillo@14 500
meillo@14 501 putchar (c);
meillo@14 502 }
meillo@14 503 }
meillo@14 504 }
meillo@14 505 }
meillo@14 506
meillo@14 507 /* Read from stream STREAM, printing to standard output any selected fields. */
meillo@14 508
meillo@14 509 static void
meillo@14 510 cut_fields (FILE *stream)
meillo@14 511 {
meillo@14 512 int c;
meillo@14 513 size_t field_idx = 1;
meillo@14 514 bool found_any_selected_field = false;
meillo@14 515 bool buffer_first_field;
meillo@14 516
meillo@14 517 current_rp = rp;
meillo@14 518
meillo@14 519 c = getc (stream);
meillo@14 520 if (c == EOF)
meillo@14 521 return;
meillo@14 522
meillo@14 523 ungetc (c, stream);
meillo@14 524 c = 0;
meillo@14 525
meillo@14 526 /* To support the semantics of the -s flag, we may have to buffer
meillo@14 527 all of the first field to determine whether it is 'delimited.'
meillo@14 528 But that is unnecessary if all non-delimited lines must be printed
meillo@14 529 and the first field has been selected, or if non-delimited lines
meillo@14 530 must be suppressed and the first field has *not* been selected.
meillo@14 531 That is because a non-delimited line has exactly one field. */
meillo@14 532 buffer_first_field = (suppress_non_delimited ^ !print_kth (1));
meillo@14 533
meillo@14 534 while (1)
meillo@14 535 {
meillo@14 536 if (field_idx == 1 && buffer_first_field)
meillo@14 537 {
meillo@14 538 ssize_t len;
meillo@14 539 size_t n_bytes;
meillo@14 540
meillo@14 541 len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
meillo@14 542 GETNLINE_NO_LIMIT, delim, '\n', stream);
meillo@14 543 if (len < 0)
meillo@14 544 {
meillo@14 545 free (field_1_buffer);
meillo@14 546 field_1_buffer = NULL;
meillo@14 547 if (ferror (stream) || feof (stream))
meillo@14 548 break;
meillo@14 549 xalloc_die ();
meillo@14 550 }
meillo@14 551
meillo@14 552 n_bytes = len;
meillo@14 553 assert (n_bytes != 0);
meillo@14 554
meillo@14 555 c = 0;
meillo@14 556
meillo@14 557 /* If the first field extends to the end of line (it is not
meillo@14 558 delimited) and we are printing all non-delimited lines,
meillo@14 559 print this one. */
meillo@14 560 if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
meillo@14 561 {
meillo@14 562 if (suppress_non_delimited)
meillo@14 563 {
meillo@14 564 /* Empty. */
meillo@14 565 }
meillo@14 566 else
meillo@14 567 {
meillo@14 568 fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
meillo@14 569 /* Make sure the output line is newline terminated. */
meillo@14 570 if (field_1_buffer[n_bytes - 1] != '\n')
meillo@14 571 putchar ('\n');
meillo@14 572 c = '\n';
meillo@14 573 }
meillo@14 574 continue;
meillo@14 575 }
meillo@14 576 if (print_kth (1))
meillo@14 577 {
meillo@14 578 /* Print the field, but not the trailing delimiter. */
meillo@14 579 fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
meillo@14 580
meillo@14 581 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
meillo@14 582 if (delim == '\n')
meillo@14 583 {
meillo@14 584 int last_c = getc (stream);
meillo@14 585 if (last_c != EOF)
meillo@14 586 {
meillo@14 587 ungetc (last_c, stream);
meillo@14 588 found_any_selected_field = true;
meillo@14 589 }
meillo@14 590 }
meillo@14 591 else
meillo@14 592 found_any_selected_field = true;
meillo@14 593 }
meillo@14 594 next_item (&field_idx);
meillo@14 595 }
meillo@14 596
meillo@14 597 int prev_c = c;
meillo@14 598
meillo@14 599 if (print_kth (field_idx))
meillo@14 600 {
meillo@14 601 if (found_any_selected_field)
meillo@14 602 {
meillo@14 603 fwrite (output_delimiter_string, sizeof (char),
meillo@14 604 output_delimiter_length, stdout);
meillo@14 605 }
meillo@14 606 found_any_selected_field = true;
meillo@14 607
meillo@14 608 while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
meillo@14 609 {
meillo@14 610 putchar (c);
meillo@14 611 prev_c = c;
meillo@14 612 }
meillo@14 613 }
meillo@14 614 else
meillo@14 615 {
meillo@14 616 while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
meillo@14 617 {
meillo@14 618 prev_c = c;
meillo@14 619 }
meillo@14 620 }
meillo@14 621
meillo@14 622 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
meillo@14 623 if (delim == '\n' && c == delim)
meillo@14 624 {
meillo@14 625 int last_c = getc (stream);
meillo@14 626 if (last_c != EOF)
meillo@14 627 ungetc (last_c, stream);
meillo@14 628 else
meillo@14 629 c = last_c;
meillo@14 630 }
meillo@14 631
meillo@14 632 if (c == delim)
meillo@14 633 next_item (&field_idx);
meillo@14 634 else if (c == '\n' || c == EOF)
meillo@14 635 {
meillo@14 636 if (found_any_selected_field
meillo@14 637 || !(suppress_non_delimited && field_idx == 1))
meillo@14 638 {
meillo@14 639 if (c == '\n' || prev_c != '\n' || delim == '\n')
meillo@14 640 putchar ('\n');
meillo@14 641 }
meillo@14 642 if (c == EOF)
meillo@14 643 break;
meillo@14 644 field_idx = 1;
meillo@14 645 current_rp = rp;
meillo@14 646 found_any_selected_field = false;
meillo@14 647 }
meillo@14 648 }
meillo@14 649 }
meillo@14 650
meillo@14 651 static void
meillo@14 652 cut_stream (FILE *stream)
meillo@14 653 {
meillo@14 654 if (operating_mode == byte_mode)
meillo@14 655 cut_bytes (stream);
meillo@14 656 else
meillo@14 657 cut_fields (stream);
meillo@14 658 }
meillo@14 659
meillo@14 660 /* Process file FILE to standard output.
meillo@14 661 Return true if successful. */
meillo@14 662
meillo@14 663 static bool
meillo@14 664 cut_file (char const *file)
meillo@14 665 {
meillo@14 666 FILE *stream;
meillo@14 667
meillo@14 668 if (STREQ (file, "-"))
meillo@14 669 {
meillo@14 670 have_read_stdin = true;
meillo@14 671 stream = stdin;
meillo@14 672 }
meillo@14 673 else
meillo@14 674 {
meillo@14 675 stream = fopen (file, "r");
meillo@14 676 if (stream == NULL)
meillo@14 677 {
meillo@14 678 error (0, errno, "%s", file);
meillo@14 679 return false;
meillo@14 680 }
meillo@14 681 }
meillo@14 682
meillo@14 683 fadvise (stream, FADVISE_SEQUENTIAL);
meillo@14 684
meillo@14 685 cut_stream (stream);
meillo@14 686
meillo@14 687 if (ferror (stream))
meillo@14 688 {
meillo@14 689 error (0, errno, "%s", file);
meillo@14 690 return false;
meillo@14 691 }
meillo@14 692 if (STREQ (file, "-"))
meillo@14 693 clearerr (stream); /* Also clear EOF. */
meillo@14 694 else if (fclose (stream) == EOF)
meillo@14 695 {
meillo@14 696 error (0, errno, "%s", file);
meillo@14 697 return false;
meillo@14 698 }
meillo@14 699 return true;
meillo@14 700 }
meillo@14 701
meillo@14 702 int
meillo@14 703 main (int argc, char **argv)
meillo@14 704 {
meillo@14 705 int optc;
meillo@14 706 bool ok;
meillo@14 707 bool delim_specified = false;
meillo@14 708 char *spec_list_string IF_LINT ( = NULL);
meillo@14 709
meillo@14 710 initialize_main (&argc, &argv);
meillo@14 711 set_program_name (argv[0]);
meillo@14 712 setlocale (LC_ALL, "");
meillo@14 713 bindtextdomain (PACKAGE, LOCALEDIR);
meillo@14 714 textdomain (PACKAGE);
meillo@14 715
meillo@14 716 atexit (close_stdout);
meillo@14 717
meillo@14 718 operating_mode = undefined_mode;
meillo@14 719
meillo@14 720 /* By default, all non-delimited lines are printed. */
meillo@14 721 suppress_non_delimited = false;
meillo@14 722
meillo@14 723 delim = '\0';
meillo@14 724 have_read_stdin = false;
meillo@14 725
meillo@14 726 while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1)
meillo@14 727 {
meillo@14 728 switch (optc)
meillo@14 729 {
meillo@14 730 case 'b':
meillo@14 731 case 'c':
meillo@14 732 /* Build the byte list. */
meillo@14 733 if (operating_mode != undefined_mode)
meillo@14 734 FATAL_ERROR (_("only one type of list may be specified"));
meillo@14 735 operating_mode = byte_mode;
meillo@14 736 spec_list_string = optarg;
meillo@14 737 break;
meillo@14 738
meillo@14 739 case 'f':
meillo@14 740 /* Build the field list. */
meillo@14 741 if (operating_mode != undefined_mode)
meillo@14 742 FATAL_ERROR (_("only one type of list may be specified"));
meillo@14 743 operating_mode = field_mode;
meillo@14 744 spec_list_string = optarg;
meillo@14 745 break;
meillo@14 746
meillo@14 747 case 'd':
meillo@14 748 /* New delimiter. */
meillo@14 749 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
meillo@14 750 if (optarg[0] != '\0' && optarg[1] != '\0')
meillo@14 751 FATAL_ERROR (_("the delimiter must be a single character"));
meillo@14 752 delim = optarg[0];
meillo@14 753 delim_specified = true;
meillo@14 754 break;
meillo@14 755
meillo@14 756 case OUTPUT_DELIMITER_OPTION:
meillo@14 757 output_delimiter_specified = true;
meillo@14 758 /* Interpret --output-delimiter='' to mean
meillo@14 759 'use the NUL byte as the delimiter.' */
meillo@14 760 output_delimiter_length = (optarg[0] == '\0'
meillo@14 761 ? 1 : strlen (optarg));
meillo@14 762 output_delimiter_string = xstrdup (optarg);
meillo@14 763 break;
meillo@14 764
meillo@14 765 case 'n':
meillo@14 766 break;
meillo@14 767
meillo@14 768 case 's':
meillo@14 769 suppress_non_delimited = true;
meillo@14 770 break;
meillo@14 771
meillo@14 772 case COMPLEMENT_OPTION:
meillo@14 773 complement = true;
meillo@14 774 break;
meillo@14 775
meillo@14 776 case_GETOPT_HELP_CHAR;
meillo@14 777
meillo@14 778 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
meillo@14 779
meillo@14 780 default:
meillo@14 781 usage (EXIT_FAILURE);
meillo@14 782 }
meillo@14 783 }
meillo@14 784
meillo@14 785 if (operating_mode == undefined_mode)
meillo@14 786 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
meillo@14 787
meillo@14 788 if (delim_specified && operating_mode != field_mode)
meillo@14 789 FATAL_ERROR (_("an input delimiter may be specified only\
meillo@14 790 when operating on fields"));
meillo@14 791
meillo@14 792 if (suppress_non_delimited && operating_mode != field_mode)
meillo@14 793 FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
meillo@14 794 \tonly when operating on fields"));
meillo@14 795
meillo@14 796 if (! set_fields (spec_list_string))
meillo@14 797 {
meillo@14 798 if (operating_mode == field_mode)
meillo@14 799 FATAL_ERROR (_("missing list of fields"));
meillo@14 800 else
meillo@14 801 FATAL_ERROR (_("missing list of positions"));
meillo@14 802 }
meillo@14 803
meillo@14 804 if (!delim_specified)
meillo@14 805 delim = '\t';
meillo@14 806
meillo@14 807 if (output_delimiter_string == NULL)
meillo@14 808 {
meillo@14 809 static char dummy[2];
meillo@14 810 dummy[0] = delim;
meillo@14 811 dummy[1] = '\0';
meillo@14 812 output_delimiter_string = dummy;
meillo@14 813 output_delimiter_length = 1;
meillo@14 814 }
meillo@14 815
meillo@14 816 if (optind == argc)
meillo@14 817 ok = cut_file ("-");
meillo@14 818 else
meillo@14 819 for (ok = true; optind < argc; optind++)
meillo@14 820 ok &= cut_file (argv[optind]);
meillo@14 821
meillo@14 822
meillo@14 823 if (have_read_stdin && fclose (stdin) == EOF)
meillo@14 824 {
meillo@14 825 error (0, errno, "-");
meillo@14 826 ok = false;
meillo@14 827 }
meillo@14 828
meillo@14 829 return ok ? EXIT_SUCCESS : EXIT_FAILURE;
meillo@14 830 }