docs/cut

diff code/cut.c__gnu.2015-05-01 @ 14:21ad1c1548c4

Code ausgewaehlter Implementierungen eingefuegt Das Datum entspricht dem Dateiaenderungsdatum.
author markus schnalke <meillo@marmaro.de>
date Tue, 12 May 2015 06:46:59 +0200
parents
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/code/cut.c__gnu.2015-05-01	Tue May 12 06:46:59 2015 +0200
     1.3 @@ -0,0 +1,830 @@
     1.4 +/* cut - remove parts of lines of files
     1.5 +   Copyright (C) 1997-2015 Free Software Foundation, Inc.
     1.6 +   Copyright (C) 1984 David M. Ihnat
     1.7 +
     1.8 +   This program is free software: you can redistribute it and/or modify
     1.9 +   it under the terms of the GNU General Public License as published by
    1.10 +   the Free Software Foundation, either version 3 of the License, or
    1.11 +   (at your option) any later version.
    1.12 +
    1.13 +   This program is distributed in the hope that it will be useful,
    1.14 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
    1.15 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    1.16 +   GNU General Public License for more details.
    1.17 +
    1.18 +   You should have received a copy of the GNU General Public License
    1.19 +   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
    1.20 +
    1.21 +/* Written by David Ihnat.  */
    1.22 +
    1.23 +/* POSIX changes, bug fixes, long-named options, and cleanup
    1.24 +   by David MacKenzie <djm@gnu.ai.mit.edu>.
    1.25 +
    1.26 +   Rewrite cut_fields and cut_bytes -- Jim Meyering.  */
    1.27 +
    1.28 +#include <config.h>
    1.29 +
    1.30 +#include <stdio.h>
    1.31 +#include <assert.h>
    1.32 +#include <getopt.h>
    1.33 +#include <sys/types.h>
    1.34 +#include "system.h"
    1.35 +
    1.36 +#include "error.h"
    1.37 +#include "fadvise.h"
    1.38 +#include "getndelim2.h"
    1.39 +#include "hash.h"
    1.40 +#include "quote.h"
    1.41 +#include "xstrndup.h"
    1.42 +
    1.43 +/* The official name of this program (e.g., no 'g' prefix).  */
    1.44 +#define PROGRAM_NAME "cut"
    1.45 +
    1.46 +#define AUTHORS \
    1.47 +  proper_name ("David M. Ihnat"), \
    1.48 +  proper_name ("David MacKenzie"), \
    1.49 +  proper_name ("Jim Meyering")
    1.50 +
    1.51 +#define FATAL_ERROR(Message)						\
    1.52 +  do									\
    1.53 +    {									\
    1.54 +      error (0, 0, (Message));						\
    1.55 +      usage (EXIT_FAILURE);						\
    1.56 +    }									\
    1.57 +  while (0)
    1.58 +
    1.59 +
    1.60 +struct range_pair
    1.61 +  {
    1.62 +    size_t lo;
    1.63 +    size_t hi;
    1.64 +  };
    1.65 +
    1.66 +/* Array of `struct range_pair' holding all the finite ranges. */
    1.67 +static struct range_pair *rp;
    1.68 +
    1.69 +/* Pointer inside RP.  When checking if a byte or field is selected
    1.70 +   by a finite range, we check if it is between CURRENT_RP.LO
    1.71 +   and CURRENT_RP.HI.  If the byte or field index is greater than
    1.72 +   CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */
    1.73 +static struct range_pair *current_rp;
    1.74 +
    1.75 +/* Number of finite ranges specified by the user. */
    1.76 +static size_t n_rp;
    1.77 +
    1.78 +/* Number of `struct range_pair's allocated. */
    1.79 +static size_t n_rp_allocated;
    1.80 +
    1.81 +
    1.82 +/* Append LOW, HIGH to the list RP of range pairs, allocating additional
    1.83 +   space if necessary.  Update global variable N_RP.  When allocating,
    1.84 +   update global variable N_RP_ALLOCATED.  */
    1.85 +
    1.86 +static void
    1.87 +add_range_pair (size_t lo, size_t hi)
    1.88 +{
    1.89 +  if (n_rp == n_rp_allocated)
    1.90 +    rp = X2NREALLOC (rp, &n_rp_allocated);
    1.91 +  rp[n_rp].lo = lo;
    1.92 +  rp[n_rp].hi = hi;
    1.93 +  ++n_rp;
    1.94 +}
    1.95 +
    1.96 +/* This buffer is used to support the semantics of the -s option
    1.97 +   (or lack of same) when the specified field list includes (does
    1.98 +   not include) the first field.  In both of those cases, the entire
    1.99 +   first field must be read into this buffer to determine whether it
   1.100 +   is followed by a delimiter or a newline before any of it may be
   1.101 +   output.  Otherwise, cut_fields can do the job without using this
   1.102 +   buffer.  */
   1.103 +static char *field_1_buffer;
   1.104 +
   1.105 +/* The number of bytes allocated for FIELD_1_BUFFER.  */
   1.106 +static size_t field_1_bufsize;
   1.107 +
   1.108 +enum operating_mode
   1.109 +  {
   1.110 +    undefined_mode,
   1.111 +
   1.112 +    /* Output characters that are in the given bytes. */
   1.113 +    byte_mode,
   1.114 +
   1.115 +    /* Output the given delimiter-separated fields. */
   1.116 +    field_mode
   1.117 +  };
   1.118 +
   1.119 +static enum operating_mode operating_mode;
   1.120 +
   1.121 +/* If true do not output lines containing no delimiter characters.
   1.122 +   Otherwise, all such lines are printed.  This option is valid only
   1.123 +   with field mode.  */
   1.124 +static bool suppress_non_delimited;
   1.125 +
   1.126 +/* If true, print all bytes, characters, or fields _except_
   1.127 +   those that were specified.  */
   1.128 +static bool complement;
   1.129 +
   1.130 +/* The delimiter character for field mode. */
   1.131 +static unsigned char delim;
   1.132 +
   1.133 +/* True if the --output-delimiter=STRING option was specified.  */
   1.134 +static bool output_delimiter_specified;
   1.135 +
   1.136 +/* The length of output_delimiter_string.  */
   1.137 +static size_t output_delimiter_length;
   1.138 +
   1.139 +/* The output field separator string.  Defaults to the 1-character
   1.140 +   string consisting of the input delimiter.  */
   1.141 +static char *output_delimiter_string;
   1.142 +
   1.143 +/* True if we have ever read standard input. */
   1.144 +static bool have_read_stdin;
   1.145 +
   1.146 +/* For long options that have no equivalent short option, use a
   1.147 +   non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
   1.148 +enum
   1.149 +{
   1.150 +  OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1,
   1.151 +  COMPLEMENT_OPTION
   1.152 +};
   1.153 +
   1.154 +static struct option const longopts[] =
   1.155 +{
   1.156 +  {"bytes", required_argument, NULL, 'b'},
   1.157 +  {"characters", required_argument, NULL, 'c'},
   1.158 +  {"fields", required_argument, NULL, 'f'},
   1.159 +  {"delimiter", required_argument, NULL, 'd'},
   1.160 +  {"only-delimited", no_argument, NULL, 's'},
   1.161 +  {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
   1.162 +  {"complement", no_argument, NULL, COMPLEMENT_OPTION},
   1.163 +  {GETOPT_HELP_OPTION_DECL},
   1.164 +  {GETOPT_VERSION_OPTION_DECL},
   1.165 +  {NULL, 0, NULL, 0}
   1.166 +};
   1.167 +
   1.168 +void
   1.169 +usage (int status)
   1.170 +{
   1.171 +  if (status != EXIT_SUCCESS)
   1.172 +    emit_try_help ();
   1.173 +  else
   1.174 +    {
   1.175 +      printf (_("\
   1.176 +Usage: %s OPTION... [FILE]...\n\
   1.177 +"),
   1.178 +              program_name);
   1.179 +      fputs (_("\
   1.180 +Print selected parts of lines from each FILE to standard output.\n\
   1.181 +"), stdout);
   1.182 +
   1.183 +      emit_stdin_note ();
   1.184 +      emit_mandatory_arg_note ();
   1.185 +
   1.186 +      fputs (_("\
   1.187 +  -b, --bytes=LIST        select only these bytes\n\
   1.188 +  -c, --characters=LIST   select only these characters\n\
   1.189 +  -d, --delimiter=DELIM   use DELIM instead of TAB for field delimiter\n\
   1.190 +"), stdout);
   1.191 +      fputs (_("\
   1.192 +  -f, --fields=LIST       select only these fields;  also print any line\n\
   1.193 +                            that contains no delimiter character, unless\n\
   1.194 +                            the -s option is specified\n\
   1.195 +  -n                      (ignored)\n\
   1.196 +"), stdout);
   1.197 +      fputs (_("\
   1.198 +      --complement        complement the set of selected bytes, characters\n\
   1.199 +                            or fields\n\
   1.200 +"), stdout);
   1.201 +      fputs (_("\
   1.202 +  -s, --only-delimited    do not print lines not containing delimiters\n\
   1.203 +      --output-delimiter=STRING  use STRING as the output delimiter\n\
   1.204 +                            the default is to use the input delimiter\n\
   1.205 +"), stdout);
   1.206 +      fputs (HELP_OPTION_DESCRIPTION, stdout);
   1.207 +      fputs (VERSION_OPTION_DESCRIPTION, stdout);
   1.208 +      fputs (_("\
   1.209 +\n\
   1.210 +Use one, and only one of -b, -c or -f.  Each LIST is made up of one\n\
   1.211 +range, or many ranges separated by commas.  Selected input is written\n\
   1.212 +in the same order that it is read, and is written exactly once.\n\
   1.213 +"), stdout);
   1.214 +      fputs (_("\
   1.215 +Each range is one of:\n\
   1.216 +\n\
   1.217 +  N     N'th byte, character or field, counted from 1\n\
   1.218 +  N-    from N'th byte, character or field, to end of line\n\
   1.219 +  N-M   from N'th to M'th (included) byte, character or field\n\
   1.220 +  -M    from first to M'th (included) byte, character or field\n\
   1.221 +"), stdout);
   1.222 +      emit_ancillary_info (PROGRAM_NAME);
   1.223 +    }
   1.224 +  exit (status);
   1.225 +}
   1.226 +
   1.227 +/* Comparison function for qsort to order the list of
   1.228 +   struct range_pairs.  */
   1.229 +static int
   1.230 +compare_ranges (const void *a, const void *b)
   1.231 +{
   1.232 +  int a_start = ((const struct range_pair *) a)->lo;
   1.233 +  int b_start = ((const struct range_pair *) b)->lo;
   1.234 +  return a_start < b_start ? -1 : a_start > b_start;
   1.235 +}
   1.236 +
   1.237 +/* Reallocate Range Pair entries, with corresponding
   1.238 +   entries outside the range of each specified entry.  */
   1.239 +
   1.240 +static void
   1.241 +complement_rp (void)
   1.242 +{
   1.243 +  if (complement)
   1.244 +    {
   1.245 +      struct range_pair *c = rp;
   1.246 +      size_t n = n_rp;
   1.247 +      size_t i;
   1.248 +
   1.249 +      rp = NULL;
   1.250 +      n_rp = 0;
   1.251 +      n_rp_allocated = 0;
   1.252 +
   1.253 +      if (c[0].lo > 1)
   1.254 +        add_range_pair (1, c[0].lo - 1);
   1.255 +
   1.256 +      for (i = 1; i < n; ++i)
   1.257 +        {
   1.258 +          if (c[i-1].hi + 1 == c[i].lo)
   1.259 +            continue;
   1.260 +
   1.261 +          add_range_pair (c[i-1].hi + 1, c[i].lo - 1);
   1.262 +        }
   1.263 +
   1.264 +      if (c[n-1].hi < SIZE_MAX)
   1.265 +        add_range_pair (c[n-1].hi + 1, SIZE_MAX);
   1.266 +
   1.267 +      free (c);
   1.268 +    }
   1.269 +}
   1.270 +
   1.271 +/* Given the list of field or byte range specifications FIELDSTR,
   1.272 +   allocate and initialize the RP array. FIELDSTR should
   1.273 +   be composed of one or more numbers or ranges of numbers, separated
   1.274 +   by blanks or commas.  Incomplete ranges may be given: '-m' means '1-m';
   1.275 +   'n-' means 'n' through end of line.
   1.276 +   Return true if FIELDSTR contains at least one field specification,
   1.277 +   false otherwise.  */
   1.278 +
   1.279 +static bool
   1.280 +set_fields (const char *fieldstr)
   1.281 +{
   1.282 +  size_t initial = 1;		/* Value of first number in a range.  */
   1.283 +  size_t value = 0;		/* If nonzero, a number being accumulated.  */
   1.284 +  bool lhs_specified = false;
   1.285 +  bool rhs_specified = false;
   1.286 +  bool dash_found = false;	/* True if a '-' is found in this field.  */
   1.287 +  bool field_found = false;	/* True if at least one field spec
   1.288 +                                   has been processed.  */
   1.289 +
   1.290 +  size_t i;
   1.291 +  bool in_digits = false;
   1.292 +
   1.293 +  /* Collect and store in RP the range end points. */
   1.294 +
   1.295 +  while (true)
   1.296 +    {
   1.297 +      if (*fieldstr == '-')
   1.298 +        {
   1.299 +          in_digits = false;
   1.300 +          /* Starting a range. */
   1.301 +          if (dash_found)
   1.302 +            FATAL_ERROR (_("invalid byte, character or field list"));
   1.303 +          dash_found = true;
   1.304 +          fieldstr++;
   1.305 +
   1.306 +          if (lhs_specified && !value)
   1.307 +            FATAL_ERROR (_("fields and positions are numbered from 1"));
   1.308 +
   1.309 +          initial = (lhs_specified ? value : 1);
   1.310 +          value = 0;
   1.311 +        }
   1.312 +      else if (*fieldstr == ','
   1.313 +               || isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
   1.314 +        {
   1.315 +          in_digits = false;
   1.316 +          /* Ending the string, or this field/byte sublist. */
   1.317 +          if (dash_found)
   1.318 +            {
   1.319 +              dash_found = false;
   1.320 +
   1.321 +              if (!lhs_specified && !rhs_specified)
   1.322 +                FATAL_ERROR (_("invalid range with no endpoint: -"));
   1.323 +
   1.324 +              /* A range.  Possibilities: -n, m-n, n-.
   1.325 +                 In any case, 'initial' contains the start of the range. */
   1.326 +              if (!rhs_specified)
   1.327 +                {
   1.328 +                  /* 'n-'.  From 'initial' to end of line. */
   1.329 +                  add_range_pair (initial, SIZE_MAX);
   1.330 +                  field_found = true;
   1.331 +                }
   1.332 +              else
   1.333 +                {
   1.334 +                  /* 'm-n' or '-n' (1-n). */
   1.335 +                  if (value < initial)
   1.336 +                    FATAL_ERROR (_("invalid decreasing range"));
   1.337 +
   1.338 +                  add_range_pair (initial, value);
   1.339 +                  field_found = true;
   1.340 +                }
   1.341 +              value = 0;
   1.342 +            }
   1.343 +          else
   1.344 +            {
   1.345 +              /* A simple field number, not a range. */
   1.346 +              if (value == 0)
   1.347 +                FATAL_ERROR (_("fields and positions are numbered from 1"));
   1.348 +              add_range_pair (value, value);
   1.349 +              value = 0;
   1.350 +              field_found = true;
   1.351 +            }
   1.352 +
   1.353 +          if (*fieldstr == '\0')
   1.354 +            break;
   1.355 +
   1.356 +          fieldstr++;
   1.357 +          lhs_specified = false;
   1.358 +          rhs_specified = false;
   1.359 +        }
   1.360 +      else if (ISDIGIT (*fieldstr))
   1.361 +        {
   1.362 +          /* Record beginning of digit string, in case we have to
   1.363 +             complain about it.  */
   1.364 +          static char const *num_start;
   1.365 +          if (!in_digits || !num_start)
   1.366 +            num_start = fieldstr;
   1.367 +          in_digits = true;
   1.368 +
   1.369 +          if (dash_found)
   1.370 +            rhs_specified = 1;
   1.371 +          else
   1.372 +            lhs_specified = 1;
   1.373 +
   1.374 +          /* Detect overflow.  */
   1.375 +          if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t)
   1.376 +              || value == SIZE_MAX)
   1.377 +            {
   1.378 +              /* In case the user specified -c$(echo 2^64|bc),22,
   1.379 +                 complain only about the first number.  */
   1.380 +              /* Determine the length of the offending number.  */
   1.381 +              size_t len = strspn (num_start, "0123456789");
   1.382 +              char *bad_num = xstrndup (num_start, len);
   1.383 +              if (operating_mode == byte_mode)
   1.384 +                error (0, 0,
   1.385 +                       _("byte offset %s is too large"), quote (bad_num));
   1.386 +              else
   1.387 +                error (0, 0,
   1.388 +                       _("field number %s is too large"), quote (bad_num));
   1.389 +              free (bad_num);
   1.390 +              exit (EXIT_FAILURE);
   1.391 +            }
   1.392 +
   1.393 +          fieldstr++;
   1.394 +        }
   1.395 +      else
   1.396 +        FATAL_ERROR (_("invalid byte, character or field list"));
   1.397 +    }
   1.398 +
   1.399 +  qsort (rp, n_rp, sizeof (rp[0]), compare_ranges);
   1.400 +
   1.401 +  /* Merge range pairs (e.g. `2-5,3-4' becomes `2-5'). */
   1.402 +  for (i = 0; i < n_rp; ++i)
   1.403 +    {
   1.404 +      for (size_t j = i + 1; j < n_rp; ++j)
   1.405 +        {
   1.406 +          if (rp[j].lo <= rp[i].hi)
   1.407 +            {
   1.408 +              rp[i].hi = MAX (rp[j].hi, rp[i].hi);
   1.409 +              memmove (rp + j, rp + j + 1, (n_rp - j - 1) * sizeof *rp);
   1.410 +              n_rp--;
   1.411 +              j--;
   1.412 +            }
   1.413 +          else
   1.414 +            break;
   1.415 +        }
   1.416 +    }
   1.417 +
   1.418 +  complement_rp ();
   1.419 +
   1.420 +  /* After merging, reallocate RP so we release memory to the system.
   1.421 +     Also add a sentinel at the end of RP, to avoid out of bounds access
   1.422 +     and for performance reasons.  */
   1.423 +  ++n_rp;
   1.424 +  rp = xrealloc (rp, n_rp * sizeof (struct range_pair));
   1.425 +  rp[n_rp - 1].lo = rp[n_rp - 1].hi = SIZE_MAX;
   1.426 +
   1.427 +  return field_found;
   1.428 +}
   1.429 +
   1.430 +/* Increment *ITEM_IDX (i.e., a field or byte index),
   1.431 +   and if required CURRENT_RP.  */
   1.432 +
   1.433 +static inline void
   1.434 +next_item (size_t *item_idx)
   1.435 +{
   1.436 +  (*item_idx)++;
   1.437 +  if ((*item_idx) > current_rp->hi)
   1.438 +    current_rp++;
   1.439 +}
   1.440 +
   1.441 +/* Return nonzero if the K'th field or byte is printable. */
   1.442 +
   1.443 +static inline bool
   1.444 +print_kth (size_t k)
   1.445 +{
   1.446 +  return current_rp->lo <= k;
   1.447 +}
   1.448 +
   1.449 +/* Return nonzero if K'th byte is the beginning of a range. */
   1.450 +
   1.451 +static inline bool
   1.452 +is_range_start_index (size_t k)
   1.453 +{
   1.454 +  return k == current_rp->lo;
   1.455 +}
   1.456 +
   1.457 +/* Read from stream STREAM, printing to standard output any selected bytes.  */
   1.458 +
   1.459 +static void
   1.460 +cut_bytes (FILE *stream)
   1.461 +{
   1.462 +  size_t byte_idx;	/* Number of bytes in the line so far. */
   1.463 +  /* Whether to begin printing delimiters between ranges for the current line.
   1.464 +     Set after we've begun printing data corresponding to the first range.  */
   1.465 +  bool print_delimiter;
   1.466 +
   1.467 +  byte_idx = 0;
   1.468 +  print_delimiter = false;
   1.469 +  current_rp = rp;
   1.470 +  while (true)
   1.471 +    {
   1.472 +      int c;		/* Each character from the file. */
   1.473 +
   1.474 +      c = getc (stream);
   1.475 +
   1.476 +      if (c == '\n')
   1.477 +        {
   1.478 +          putchar ('\n');
   1.479 +          byte_idx = 0;
   1.480 +          print_delimiter = false;
   1.481 +          current_rp = rp;
   1.482 +        }
   1.483 +      else if (c == EOF)
   1.484 +        {
   1.485 +          if (byte_idx > 0)
   1.486 +            putchar ('\n');
   1.487 +          break;
   1.488 +        }
   1.489 +      else
   1.490 +        {
   1.491 +          next_item (&byte_idx);
   1.492 +          if (print_kth (byte_idx))
   1.493 +            {
   1.494 +              if (output_delimiter_specified)
   1.495 +                {
   1.496 +                  if (print_delimiter && is_range_start_index (byte_idx))
   1.497 +                    {
   1.498 +                      fwrite (output_delimiter_string, sizeof (char),
   1.499 +                              output_delimiter_length, stdout);
   1.500 +                    }
   1.501 +                  print_delimiter = true;
   1.502 +                }
   1.503 +
   1.504 +              putchar (c);
   1.505 +            }
   1.506 +        }
   1.507 +    }
   1.508 +}
   1.509 +
   1.510 +/* Read from stream STREAM, printing to standard output any selected fields.  */
   1.511 +
   1.512 +static void
   1.513 +cut_fields (FILE *stream)
   1.514 +{
   1.515 +  int c;
   1.516 +  size_t field_idx = 1;
   1.517 +  bool found_any_selected_field = false;
   1.518 +  bool buffer_first_field;
   1.519 +
   1.520 +  current_rp = rp;
   1.521 +
   1.522 +  c = getc (stream);
   1.523 +  if (c == EOF)
   1.524 +    return;
   1.525 +
   1.526 +  ungetc (c, stream);
   1.527 +  c = 0;
   1.528 +
   1.529 +  /* To support the semantics of the -s flag, we may have to buffer
   1.530 +     all of the first field to determine whether it is 'delimited.'
   1.531 +     But that is unnecessary if all non-delimited lines must be printed
   1.532 +     and the first field has been selected, or if non-delimited lines
   1.533 +     must be suppressed and the first field has *not* been selected.
   1.534 +     That is because a non-delimited line has exactly one field.  */
   1.535 +  buffer_first_field = (suppress_non_delimited ^ !print_kth (1));
   1.536 +
   1.537 +  while (1)
   1.538 +    {
   1.539 +      if (field_idx == 1 && buffer_first_field)
   1.540 +        {
   1.541 +          ssize_t len;
   1.542 +          size_t n_bytes;
   1.543 +
   1.544 +          len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
   1.545 +                            GETNLINE_NO_LIMIT, delim, '\n', stream);
   1.546 +          if (len < 0)
   1.547 +            {
   1.548 +              free (field_1_buffer);
   1.549 +              field_1_buffer = NULL;
   1.550 +              if (ferror (stream) || feof (stream))
   1.551 +                break;
   1.552 +              xalloc_die ();
   1.553 +            }
   1.554 +
   1.555 +          n_bytes = len;
   1.556 +          assert (n_bytes != 0);
   1.557 +
   1.558 +          c = 0;
   1.559 +
   1.560 +          /* If the first field extends to the end of line (it is not
   1.561 +             delimited) and we are printing all non-delimited lines,
   1.562 +             print this one.  */
   1.563 +          if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
   1.564 +            {
   1.565 +              if (suppress_non_delimited)
   1.566 +                {
   1.567 +                  /* Empty.  */
   1.568 +                }
   1.569 +              else
   1.570 +                {
   1.571 +                  fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
   1.572 +                  /* Make sure the output line is newline terminated.  */
   1.573 +                  if (field_1_buffer[n_bytes - 1] != '\n')
   1.574 +                    putchar ('\n');
   1.575 +                  c = '\n';
   1.576 +                }
   1.577 +              continue;
   1.578 +            }
   1.579 +          if (print_kth (1))
   1.580 +            {
   1.581 +              /* Print the field, but not the trailing delimiter.  */
   1.582 +              fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
   1.583 +
   1.584 +              /* With -d$'\n' don't treat the last '\n' as a delimiter.  */
   1.585 +              if (delim == '\n')
   1.586 +                {
   1.587 +                  int last_c = getc (stream);
   1.588 +                  if (last_c != EOF)
   1.589 +                    {
   1.590 +                      ungetc (last_c, stream);
   1.591 +                      found_any_selected_field = true;
   1.592 +                    }
   1.593 +                }
   1.594 +              else
   1.595 +                found_any_selected_field = true;
   1.596 +            }
   1.597 +          next_item (&field_idx);
   1.598 +        }
   1.599 +
   1.600 +      int prev_c = c;
   1.601 +
   1.602 +      if (print_kth (field_idx))
   1.603 +        {
   1.604 +          if (found_any_selected_field)
   1.605 +            {
   1.606 +              fwrite (output_delimiter_string, sizeof (char),
   1.607 +                      output_delimiter_length, stdout);
   1.608 +            }
   1.609 +          found_any_selected_field = true;
   1.610 +
   1.611 +          while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
   1.612 +            {
   1.613 +              putchar (c);
   1.614 +              prev_c = c;
   1.615 +            }
   1.616 +        }
   1.617 +      else
   1.618 +        {
   1.619 +          while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
   1.620 +            {
   1.621 +              prev_c = c;
   1.622 +            }
   1.623 +        }
   1.624 +
   1.625 +      /* With -d$'\n' don't treat the last '\n' as a delimiter.  */
   1.626 +      if (delim == '\n' && c == delim)
   1.627 +        {
   1.628 +          int last_c = getc (stream);
   1.629 +          if (last_c != EOF)
   1.630 +            ungetc (last_c, stream);
   1.631 +          else
   1.632 +            c = last_c;
   1.633 +        }
   1.634 +
   1.635 +      if (c == delim)
   1.636 +        next_item (&field_idx);
   1.637 +      else if (c == '\n' || c == EOF)
   1.638 +        {
   1.639 +          if (found_any_selected_field
   1.640 +              || !(suppress_non_delimited && field_idx == 1))
   1.641 +            {
   1.642 +              if (c == '\n' || prev_c != '\n' || delim == '\n')
   1.643 +                putchar ('\n');
   1.644 +            }
   1.645 +          if (c == EOF)
   1.646 +            break;
   1.647 +          field_idx = 1;
   1.648 +          current_rp = rp;
   1.649 +          found_any_selected_field = false;
   1.650 +        }
   1.651 +    }
   1.652 +}
   1.653 +
   1.654 +static void
   1.655 +cut_stream (FILE *stream)
   1.656 +{
   1.657 +  if (operating_mode == byte_mode)
   1.658 +    cut_bytes (stream);
   1.659 +  else
   1.660 +    cut_fields (stream);
   1.661 +}
   1.662 +
   1.663 +/* Process file FILE to standard output.
   1.664 +   Return true if successful.  */
   1.665 +
   1.666 +static bool
   1.667 +cut_file (char const *file)
   1.668 +{
   1.669 +  FILE *stream;
   1.670 +
   1.671 +  if (STREQ (file, "-"))
   1.672 +    {
   1.673 +      have_read_stdin = true;
   1.674 +      stream = stdin;
   1.675 +    }
   1.676 +  else
   1.677 +    {
   1.678 +      stream = fopen (file, "r");
   1.679 +      if (stream == NULL)
   1.680 +        {
   1.681 +          error (0, errno, "%s", file);
   1.682 +          return false;
   1.683 +        }
   1.684 +    }
   1.685 +
   1.686 +  fadvise (stream, FADVISE_SEQUENTIAL);
   1.687 +
   1.688 +  cut_stream (stream);
   1.689 +
   1.690 +  if (ferror (stream))
   1.691 +    {
   1.692 +      error (0, errno, "%s", file);
   1.693 +      return false;
   1.694 +    }
   1.695 +  if (STREQ (file, "-"))
   1.696 +    clearerr (stream);		/* Also clear EOF. */
   1.697 +  else if (fclose (stream) == EOF)
   1.698 +    {
   1.699 +      error (0, errno, "%s", file);
   1.700 +      return false;
   1.701 +    }
   1.702 +  return true;
   1.703 +}
   1.704 +
   1.705 +int
   1.706 +main (int argc, char **argv)
   1.707 +{
   1.708 +  int optc;
   1.709 +  bool ok;
   1.710 +  bool delim_specified = false;
   1.711 +  char *spec_list_string IF_LINT ( = NULL);
   1.712 +
   1.713 +  initialize_main (&argc, &argv);
   1.714 +  set_program_name (argv[0]);
   1.715 +  setlocale (LC_ALL, "");
   1.716 +  bindtextdomain (PACKAGE, LOCALEDIR);
   1.717 +  textdomain (PACKAGE);
   1.718 +
   1.719 +  atexit (close_stdout);
   1.720 +
   1.721 +  operating_mode = undefined_mode;
   1.722 +
   1.723 +  /* By default, all non-delimited lines are printed.  */
   1.724 +  suppress_non_delimited = false;
   1.725 +
   1.726 +  delim = '\0';
   1.727 +  have_read_stdin = false;
   1.728 +
   1.729 +  while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1)
   1.730 +    {
   1.731 +      switch (optc)
   1.732 +        {
   1.733 +        case 'b':
   1.734 +        case 'c':
   1.735 +          /* Build the byte list. */
   1.736 +          if (operating_mode != undefined_mode)
   1.737 +            FATAL_ERROR (_("only one type of list may be specified"));
   1.738 +          operating_mode = byte_mode;
   1.739 +          spec_list_string = optarg;
   1.740 +          break;
   1.741 +
   1.742 +        case 'f':
   1.743 +          /* Build the field list. */
   1.744 +          if (operating_mode != undefined_mode)
   1.745 +            FATAL_ERROR (_("only one type of list may be specified"));
   1.746 +          operating_mode = field_mode;
   1.747 +          spec_list_string = optarg;
   1.748 +          break;
   1.749 +
   1.750 +        case 'd':
   1.751 +          /* New delimiter. */
   1.752 +          /* Interpret -d '' to mean 'use the NUL byte as the delimiter.'  */
   1.753 +          if (optarg[0] != '\0' && optarg[1] != '\0')
   1.754 +            FATAL_ERROR (_("the delimiter must be a single character"));
   1.755 +          delim = optarg[0];
   1.756 +          delim_specified = true;
   1.757 +          break;
   1.758 +
   1.759 +        case OUTPUT_DELIMITER_OPTION:
   1.760 +          output_delimiter_specified = true;
   1.761 +          /* Interpret --output-delimiter='' to mean
   1.762 +             'use the NUL byte as the delimiter.'  */
   1.763 +          output_delimiter_length = (optarg[0] == '\0'
   1.764 +                                     ? 1 : strlen (optarg));
   1.765 +          output_delimiter_string = xstrdup (optarg);
   1.766 +          break;
   1.767 +
   1.768 +        case 'n':
   1.769 +          break;
   1.770 +
   1.771 +        case 's':
   1.772 +          suppress_non_delimited = true;
   1.773 +          break;
   1.774 +
   1.775 +        case COMPLEMENT_OPTION:
   1.776 +          complement = true;
   1.777 +          break;
   1.778 +
   1.779 +        case_GETOPT_HELP_CHAR;
   1.780 +
   1.781 +        case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
   1.782 +
   1.783 +        default:
   1.784 +          usage (EXIT_FAILURE);
   1.785 +        }
   1.786 +    }
   1.787 +
   1.788 +  if (operating_mode == undefined_mode)
   1.789 +    FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
   1.790 +
   1.791 +  if (delim_specified && operating_mode != field_mode)
   1.792 +    FATAL_ERROR (_("an input delimiter may be specified only\
   1.793 + when operating on fields"));
   1.794 +
   1.795 +  if (suppress_non_delimited && operating_mode != field_mode)
   1.796 +    FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
   1.797 +\tonly when operating on fields"));
   1.798 +
   1.799 +  if (! set_fields (spec_list_string))
   1.800 +    {
   1.801 +      if (operating_mode == field_mode)
   1.802 +        FATAL_ERROR (_("missing list of fields"));
   1.803 +      else
   1.804 +        FATAL_ERROR (_("missing list of positions"));
   1.805 +    }
   1.806 +
   1.807 +  if (!delim_specified)
   1.808 +    delim = '\t';
   1.809 +
   1.810 +  if (output_delimiter_string == NULL)
   1.811 +    {
   1.812 +      static char dummy[2];
   1.813 +      dummy[0] = delim;
   1.814 +      dummy[1] = '\0';
   1.815 +      output_delimiter_string = dummy;
   1.816 +      output_delimiter_length = 1;
   1.817 +    }
   1.818 +
   1.819 +  if (optind == argc)
   1.820 +    ok = cut_file ("-");
   1.821 +  else
   1.822 +    for (ok = true; optind < argc; optind++)
   1.823 +      ok &= cut_file (argv[optind]);
   1.824 +
   1.825 +
   1.826 +  if (have_read_stdin && fclose (stdin) == EOF)
   1.827 +    {
   1.828 +      error (0, errno, "-");
   1.829 +      ok = false;
   1.830 +    }
   1.831 +
   1.832 +  return ok ? EXIT_SUCCESS : EXIT_FAILURE;
   1.833 +}