docs/cut

diff code/cut.c__gnu.1992-11-08 @ 14:21ad1c1548c4

Code ausgewaehlter Implementierungen eingefuegt Das Datum entspricht dem Dateiaenderungsdatum.
author markus schnalke <meillo@marmaro.de>
date Tue, 12 May 2015 06:46:59 +0200
parents
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/code/cut.c__gnu.1992-11-08	Tue May 12 06:46:59 2015 +0200
     1.3 @@ -0,0 +1,586 @@
     1.4 +/* cut - remove parts of lines of files
     1.5 +   Copyright (C) 1984 by David M. Ihnat
     1.6 + 
     1.7 +   This program is a total rewrite of the Bell Laboratories Unix(Tm)
     1.8 +   command of the same name, as of System V.  It contains no proprietary
     1.9 +   code, and therefore may be used without violation of any proprietary
    1.10 +   agreements whatsoever.  However, you will notice that the program is
    1.11 +   copyrighted by me.  This is to assure the program does *not* fall
    1.12 +   into the public domain.  Thus, I may specify just what I am now:
    1.13 +   This program may be freely copied and distributed, provided this notice
    1.14 +   remains; it may not be sold for profit without express written consent of
    1.15 +   the author.
    1.16 +   Please note that I recreated the behavior of the Unix(Tm) 'cut' command
    1.17 +   as faithfully as possible; however, I haven't run a full set of regression
    1.18 +   tests.  Thus, the user of this program accepts full responsibility for any
    1.19 +   effects or loss; in particular, the author is not responsible for any losses,
    1.20 +   explicit or incidental, that may be incurred through use of this program.
    1.21 +
    1.22 +   I ask that any bugs (and, if possible, fixes) be reported to me when
    1.23 +   possible.  -David Ihnat (312) 784-4544 ignatz@homebru.chi.il.us
    1.24 +
    1.25 +   POSIX changes, bug fixes, long-named options, and cleanup
    1.26 +   by David MacKenzie <djm@ai.mit.edu>.
    1.27 +
    1.28 +   Options:
    1.29 +   --bytes=byte-list
    1.30 +   -b byte-list			Print only the bytes in positions listed
    1.31 +				in BYTE-LIST.
    1.32 +				Tabs and backspaces are treated like any
    1.33 +				other character; they take up 1 byte.
    1.34 +
    1.35 +   --characters=character-list
    1.36 +   -c character-list		Print only characters in positions listed
    1.37 +				in CHARACTER-LIST.
    1.38 +				The same as -b for now, but
    1.39 +				internationalization will change that.
    1.40 +				Tabs and backspaces are treated like any
    1.41 +				other character; they take up 1 character.
    1.42 +
    1.43 +   --fields=field-list
    1.44 +   -f field-list		Print only the fields listed in FIELD-LIST.
    1.45 +				Fields are separated by a TAB by default.
    1.46 +
    1.47 +   --delimiter=delim
    1.48 +   -d delim			For -f, fields are separated by the first
    1.49 +				character in DELIM instead of TAB.
    1.50 +
    1.51 +   -n				Do not split multibyte chars (no-op for now).
    1.52 +
    1.53 +   --only-delimited
    1.54 +   -s				For -f, do not print lines that do not contain
    1.55 +				the field separator character.
    1.56 +
    1.57 +   The BYTE-LIST, CHARACTER-LIST, and FIELD-LIST are one or more numbers
    1.58 +   or ranges separated by commas.  The first byte, character, and field
    1.59 +   are numbered 1.
    1.60 +
    1.61 +   A FILE of `-' means standard input. */
    1.62 +
    1.63 +#define _GNU_SOURCE
    1.64 +#include <ctype.h>
    1.65 +#ifndef isblank
    1.66 +#define isblank(c) ((c) == ' ' || (c) == '\t')
    1.67 +#endif
    1.68 +#include <stdio.h>
    1.69 +#include <getopt.h>
    1.70 +#include <sys/types.h>
    1.71 +#include "system.h"
    1.72 +
    1.73 +#ifdef isascii
    1.74 +#define ISDIGIT(c) (isascii ((c)) && isdigit ((c)))
    1.75 +#else
    1.76 +#define ISDIGIT(c) (isdigit ((c)))
    1.77 +#endif
    1.78 +
    1.79 +char *xmalloc ();
    1.80 +char *xrealloc ();
    1.81 +int set_fields ();
    1.82 +int cut_file ();
    1.83 +void cut_stream ();
    1.84 +void cut_bytes ();
    1.85 +void cut_fields ();
    1.86 +void enlarge_line ();
    1.87 +void error ();
    1.88 +void invalid_list ();
    1.89 +void usage ();
    1.90 +
    1.91 +/* The number of elements allocated for the input line
    1.92 +   and the byte or field number.
    1.93 +   Enlarged as necessary. */
    1.94 +int line_size;
    1.95 +
    1.96 +/* Processed output buffer. */
    1.97 +char *outbuf;
    1.98 +
    1.99 +/* Where to save next char to output. */
   1.100 +char *outbufptr;
   1.101 +
   1.102 +/* Raw line buffer for field mode. */
   1.103 +char *inbuf;
   1.104 +
   1.105 +/* Where to save next input char. */
   1.106 +char *inbufptr;
   1.107 +
   1.108 +/* What can be done about a byte or field. */
   1.109 +enum field_action
   1.110 +{
   1.111 +  field_omit,
   1.112 +  field_output
   1.113 +};
   1.114 +
   1.115 +/* In byte mode, which bytes to output.
   1.116 +   In field mode, which `delim'-separated fields to output.
   1.117 +   Both bytes and fields are numbered starting with 1,
   1.118 +   so the first element of `fields' is unused. */
   1.119 +enum field_action *fields;
   1.120 +
   1.121 +enum operating_mode
   1.122 +{
   1.123 +  undefined_mode,
   1.124 +
   1.125 +  /* Output characters that are in the given bytes. */
   1.126 +  byte_mode,
   1.127 +
   1.128 +  /* Output the given delimeter-separated fields. */
   1.129 +  field_mode
   1.130 +};
   1.131 +
   1.132 +enum operating_mode operating_mode;
   1.133 +
   1.134 +/* If nonzero,
   1.135 +   for field mode, do not output lines containing no delimeter characters. */
   1.136 +int delimited_lines_only;
   1.137 +
   1.138 +/* The delimeter character for field mode. */
   1.139 +unsigned char delim;
   1.140 +
   1.141 +/* Nonzero if we have ever read standard input. */
   1.142 +int have_read_stdin;
   1.143 +
   1.144 +/* The name this program was run with. */
   1.145 +char *program_name;
   1.146 +
   1.147 +struct option longopts[] =
   1.148 +{
   1.149 +  {"bytes", 1, 0, 'b'},
   1.150 +  {"characters", 1, 0, 'c'},
   1.151 +  {"fields", 1, 0, 'f'},
   1.152 +  {"delimiter", 1, 0, 'd'},
   1.153 +  {"only-delimited", 0, 0, 's'},
   1.154 +  {0, 0, 0, 0}
   1.155 +};
   1.156 +
   1.157 +void
   1.158 +main (argc, argv)
   1.159 +     int argc;
   1.160 +     char **argv;
   1.161 +{
   1.162 +  int optc, exit_status = 0;
   1.163 +
   1.164 +  program_name = argv[0];
   1.165 +
   1.166 +  line_size = 512;
   1.167 +  operating_mode = undefined_mode;
   1.168 +  delimited_lines_only = 0;
   1.169 +  delim = '\0';
   1.170 +  have_read_stdin = 0;
   1.171 +
   1.172 +  fields = (enum field_action *)
   1.173 +    xmalloc (line_size * sizeof (enum field_action));
   1.174 +  outbuf = (char *) xmalloc (line_size);
   1.175 +  inbuf = (char *) xmalloc (line_size);
   1.176 +
   1.177 +  for (optc = 0; optc < line_size; optc++)
   1.178 +    fields[optc] = field_omit;
   1.179 +
   1.180 +  while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, (int *) 0))
   1.181 +	 != EOF)
   1.182 +    {
   1.183 +      switch (optc)
   1.184 +	{
   1.185 +	case 'b':
   1.186 +	case 'c':
   1.187 +	  /* Build the byte list. */
   1.188 +	  if (operating_mode != undefined_mode)
   1.189 +	    usage ();
   1.190 +	  operating_mode = byte_mode;
   1.191 +	  if (set_fields (optarg) == 0)
   1.192 +	    error (2, 0, "no fields given");
   1.193 +	  break;
   1.194 +
   1.195 +	case 'f':
   1.196 +	  /* Build the field list. */
   1.197 +	  if (operating_mode != undefined_mode)
   1.198 +	    usage ();
   1.199 +	  operating_mode = field_mode;
   1.200 +	  if (set_fields (optarg) == 0)
   1.201 +	    error (2, 0, "no fields given");
   1.202 +	  break;
   1.203 +
   1.204 +	case 'd':
   1.205 +	  /* New delimiter. */
   1.206 +	  if (optarg[0] == '\0')
   1.207 +	    error (2, 0, "no delimiter given");
   1.208 +	  if (optarg[1] != '\0')
   1.209 +	    error (2, 0, "delimiter must be a single character");
   1.210 +	  delim = optarg[0];
   1.211 +	  break;
   1.212 +
   1.213 +	case 'n':
   1.214 +	  break;
   1.215 +
   1.216 +	case 's':
   1.217 +	  delimited_lines_only++;
   1.218 +	  break;
   1.219 +
   1.220 +	default:
   1.221 +	  usage ();
   1.222 +	}
   1.223 +    }
   1.224 +
   1.225 +  if (operating_mode == undefined_mode)
   1.226 +    usage ();
   1.227 +
   1.228 +  if ((delimited_lines_only || delim != '\0') && operating_mode != field_mode)
   1.229 +    usage ();
   1.230 +
   1.231 +  if (delim == '\0')
   1.232 +    delim = '\t';
   1.233 +
   1.234 +  if (optind == argc)
   1.235 +    exit_status |= cut_file ("-");
   1.236 +  else
   1.237 +    for (; optind < argc; optind++)
   1.238 +      exit_status |= cut_file (argv[optind]);
   1.239 +
   1.240 +  if (have_read_stdin && fclose (stdin) == EOF)
   1.241 +    {
   1.242 +      error (0, errno, "-");
   1.243 +      exit_status = 1;
   1.244 +    }
   1.245 +  if (ferror (stdout) || fclose (stdout) == EOF)
   1.246 +    error (1, 0, "write error");
   1.247 +
   1.248 +  exit (exit_status);
   1.249 +}
   1.250 +
   1.251 +/* Select for printing the positions in `fields' that are listed in
   1.252 +   byte or field specification FIELDSTR.  FIELDSTR should be
   1.253 +   composed of one or more numbers or ranges of numbers, separated by
   1.254 +   blanks or commas.  Incomplete ranges may be given: `-m' means
   1.255 +   `1-m'; `n-' means `n' through end of line or last field.
   1.256 +
   1.257 +   Return the number of fields selected. */
   1.258 +
   1.259 +int
   1.260 +set_fields (fieldstr)
   1.261 +     char *fieldstr;
   1.262 +{
   1.263 +  int initial = 1;		/* Value of first number in a range. */
   1.264 +  int dash_found = 0;		/* Nonzero if a '-' is found in this field. */
   1.265 +  int value = 0;		/* If nonzero, a number being accumulated. */
   1.266 +  int fields_selected = 0;	/* Number of fields selected so far. */
   1.267 +  /* If nonzero, index of first field in a range that goes to end of line. */
   1.268 +  int eol_range_start = 0;
   1.269 +
   1.270 +  for (;;)
   1.271 +    {
   1.272 +      if (*fieldstr == '-')
   1.273 +	{
   1.274 +	  /* Starting a range. */
   1.275 +	  if (dash_found)
   1.276 +	    invalid_list ();
   1.277 +	  dash_found++;
   1.278 +	  fieldstr++;
   1.279 +
   1.280 +	  if (value)
   1.281 +	    {
   1.282 +	      if (value >= line_size)
   1.283 +		enlarge_line (value);
   1.284 +	      initial = value;
   1.285 +	      value = 0;
   1.286 +	    }
   1.287 +	  else
   1.288 +	    initial = 1;
   1.289 +	}
   1.290 +      else if (*fieldstr == ',' || isblank (*fieldstr) || *fieldstr == '\0')
   1.291 +	{
   1.292 +	  /* Ending the string, or this field/byte sublist. */
   1.293 +	  if (dash_found)
   1.294 +	    {
   1.295 +	      dash_found = 0;
   1.296 +
   1.297 +	      /* A range.  Possibilites: -n, m-n, n-.
   1.298 +		 In any case, `initial' contains the start of the range. */
   1.299 +	      if (value == 0)
   1.300 +		{
   1.301 +		  /* `n-'.  From `initial' to end of line. */
   1.302 +		  eol_range_start = initial;
   1.303 +		  fields_selected++;
   1.304 +		}
   1.305 +	      else
   1.306 +		{
   1.307 +		  /* `m-n' or `-n' (1-n). */
   1.308 +		  if (value < initial)
   1.309 +		    invalid_list ();
   1.310 +
   1.311 +		  if (value >= line_size)
   1.312 +		    enlarge_line (value);
   1.313 +
   1.314 +		  /* Is there already a range going to end of line? */
   1.315 +		  if (eol_range_start != 0)
   1.316 +		    {
   1.317 +		      /* Yes.  Is the new sequence already contained
   1.318 +			 in the old one?  If so, no processing is
   1.319 +			 necessary. */
   1.320 +		      if (initial < eol_range_start)
   1.321 +			{
   1.322 +			  /* No, the new sequence starts before the
   1.323 +			     old.  Does the old range going to end of line
   1.324 +			     extend into the new range?  */
   1.325 +			  if (eol_range_start < value)
   1.326 +			    /* Yes.  Simply move the end of line marker. */
   1.327 +			    eol_range_start = initial;
   1.328 +			  else
   1.329 +			    {
   1.330 +			      /* No.  A simple range, before and disjoint from
   1.331 +				 the range going to end of line.  Fill it. */
   1.332 +			      for (; initial <= value; initial++)
   1.333 +				fields[initial] = field_output;
   1.334 +			    }
   1.335 +
   1.336 +			  /* In any case, some fields were selected. */
   1.337 +			  fields_selected++;
   1.338 +			}
   1.339 +		    }
   1.340 +		  else
   1.341 +		    {
   1.342 +		      /* There is no range going to end of line. */
   1.343 +		      for (; initial <= value; initial++)
   1.344 +			fields[initial] = field_output;
   1.345 +		      fields_selected++;
   1.346 +		    }
   1.347 +		  value = 0;
   1.348 +		}
   1.349 +	    }
   1.350 +	  else if (value != 0)
   1.351 +	    {
   1.352 +	      /* A simple field number, not a range. */
   1.353 +	      if (value >= line_size)
   1.354 +		enlarge_line (value);
   1.355 +
   1.356 +	      fields[value] = field_output;
   1.357 +	      value = 0;
   1.358 +	      fields_selected++;
   1.359 +	    }
   1.360 +
   1.361 +	  if (*fieldstr == '\0')
   1.362 +	    {
   1.363 +	      /* If there was a range going to end of line, fill the
   1.364 +		 array from the end of line point.  */
   1.365 +	      if (eol_range_start)
   1.366 +		for (initial = eol_range_start; initial < line_size; initial++)
   1.367 +		  fields[initial] = field_output;
   1.368 +
   1.369 +	      return fields_selected;
   1.370 +	    }
   1.371 +
   1.372 +	  fieldstr++;
   1.373 +	}
   1.374 +      else if (ISDIGIT (*fieldstr))
   1.375 +	{
   1.376 +	  value = 10 * value + *fieldstr - '0';
   1.377 +	  fieldstr++;
   1.378 +	}
   1.379 +      else
   1.380 +	invalid_list ();
   1.381 +    }
   1.382 +}
   1.383 +
   1.384 +/* Process file FILE to standard output.
   1.385 +   Return 0 if successful, 1 if not. */
   1.386 +
   1.387 +int
   1.388 +cut_file (file)
   1.389 +     char *file;
   1.390 +{
   1.391 +  FILE *stream;
   1.392 +
   1.393 +  if (!strcmp (file, "-"))
   1.394 +    {
   1.395 +      have_read_stdin = 1;
   1.396 +      stream = stdin;
   1.397 +    }
   1.398 +  else
   1.399 +    {
   1.400 +      stream = fopen (file, "r");
   1.401 +      if (stream == NULL)
   1.402 +	{
   1.403 +	  error (0, errno, "%s", file);
   1.404 +	  return 1;
   1.405 +	}
   1.406 +    }
   1.407 +
   1.408 +  cut_stream (stream);
   1.409 +
   1.410 +  if (ferror (stream))
   1.411 +    {
   1.412 +      error (0, errno, "%s", file);
   1.413 +      return 1;
   1.414 +    }
   1.415 +  if (!strcmp (file, "-"))
   1.416 +    clearerr (stream);		/* Also clear EOF. */
   1.417 +  else if (fclose (stream) == EOF)
   1.418 +    {
   1.419 +      error (0, errno, "%s", file);
   1.420 +      return 1;
   1.421 +    }
   1.422 +  return 0;
   1.423 +}
   1.424 +
   1.425 +void
   1.426 +cut_stream (stream)
   1.427 +     FILE *stream;
   1.428 +{
   1.429 +  if (operating_mode == byte_mode)
   1.430 +    cut_bytes (stream);
   1.431 +  else
   1.432 +    cut_fields (stream);
   1.433 +}
   1.434 +
   1.435 +/* Print the file open for reading on stream STREAM
   1.436 +   with the bytes marked `field_omit' in `fields' removed from each line. */
   1.437 +
   1.438 +void
   1.439 +cut_bytes (stream)
   1.440 +     FILE *stream;
   1.441 +{
   1.442 +  register int c;		/* Each character from the file. */
   1.443 +  int doneflag = 0;		/* Nonzero if EOF reached. */
   1.444 +  int char_count;		/* Number of chars in the line so far. */
   1.445 +
   1.446 +  while (doneflag == 0)
   1.447 +    {
   1.448 +      /* Start processing a line. */
   1.449 +      outbufptr = outbuf;
   1.450 +      char_count = 0;
   1.451 +
   1.452 +      do
   1.453 +	{
   1.454 +	  c = getc (stream);
   1.455 +	  if (c == EOF)
   1.456 +	    {
   1.457 +	      doneflag++;
   1.458 +	      break;
   1.459 +	    }
   1.460 +
   1.461 +	  /* If this character is to be sent, stow it in the outbuffer. */
   1.462 +
   1.463 +	  if (++char_count == line_size - 1)
   1.464 +	    enlarge_line (char_count);
   1.465 +
   1.466 +	  if (fields[char_count] == field_output || c == '\n')
   1.467 +	    *outbufptr++ = c;
   1.468 +	}
   1.469 +      while (c != '\n');
   1.470 +
   1.471 +      if (char_count)
   1.472 +	fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout);
   1.473 +    }
   1.474 +}
   1.475 +
   1.476 +/* Print the file open for reading on stream STREAM
   1.477 +   with the fields marked `field_omit' in `fields' removed from each line.
   1.478 +   All characters are initially stowed in the raw input buffer, until
   1.479 +   at least one field has been found. */
   1.480 +
   1.481 +void
   1.482 +cut_fields (stream)
   1.483 +     FILE *stream;
   1.484 +{
   1.485 +  register int c;		/* Each character from the file. */
   1.486 +  int doneflag = 0;		/* Nonzero if EOF reached. */
   1.487 +  int char_count;		/* Number of chars in line before any delim. */
   1.488 +  int fieldfound;		/* Nonzero if any fields to print found. */
   1.489 +  int curr_field;		/* Current index in `fields'. */
   1.490 +
   1.491 +  while (doneflag == 0)
   1.492 +    {
   1.493 +      char_count = 0;
   1.494 +      fieldfound = 0;
   1.495 +      curr_field = 1;
   1.496 +      outbufptr = outbuf;
   1.497 +      inbufptr = inbuf;
   1.498 +
   1.499 +      do
   1.500 +	{
   1.501 +	  c = getc (stream);
   1.502 +	  if (c == EOF)
   1.503 +	    {
   1.504 +	      doneflag++;
   1.505 +	      break;
   1.506 +	    }
   1.507 +
   1.508 +	  if (fields[curr_field] == field_output && c != '\n')
   1.509 +	    {
   1.510 +	      /* Working on a field.  It, and its terminating
   1.511 +		 delimiter, go only into the processed buffer. */
   1.512 +	      fieldfound = 1;
   1.513 +	      if (outbufptr - outbuf == line_size - 2)
   1.514 +		enlarge_line (outbufptr - outbuf);
   1.515 +	      *outbufptr++ = c;
   1.516 +	    }
   1.517 +	  else if (fieldfound == 0)
   1.518 +	    {
   1.519 +	      if (++char_count == line_size - 1)
   1.520 +		enlarge_line (char_count);
   1.521 +	      *inbufptr++ = c;
   1.522 +	    }
   1.523 +
   1.524 +	  if (c == delim && ++curr_field == line_size - 1)
   1.525 +	    enlarge_line (curr_field);
   1.526 +	}
   1.527 +      while (c != '\n');
   1.528 +
   1.529 +      if (fieldfound)
   1.530 +	{
   1.531 +	  /* Something was found. Print it. */
   1.532 +	  if (outbufptr[-1] == delim)
   1.533 +	    --outbufptr;	/* Suppress trailing delimiter. */
   1.534 +
   1.535 +	  fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout);
   1.536 +	  if (c == '\n')
   1.537 +	    putc (c, stdout);
   1.538 +	}
   1.539 +      else if (!delimited_lines_only && char_count)
   1.540 +	/* A line with some characters, no delimiters, and no
   1.541 +	   suppression.  Print it. */
   1.542 +	fwrite (inbuf, sizeof (char), inbufptr - inbuf, stdout);
   1.543 +    }
   1.544 +}
   1.545 +
   1.546 +/* Extend the buffers to accomodate at least NEW_SIZE characters. */
   1.547 +
   1.548 +void
   1.549 +enlarge_line (new_size)
   1.550 +     int new_size;
   1.551 +{
   1.552 +  char *newp;
   1.553 +  int i;
   1.554 +
   1.555 +  new_size += 256;		/* Leave some room to grow. */
   1.556 +
   1.557 +  fields = (enum field_action *)
   1.558 +    xrealloc (fields, new_size * sizeof (enum field_action));
   1.559 +
   1.560 +  newp = (char *) xrealloc (outbuf, new_size);
   1.561 +  outbufptr += newp - outbuf;
   1.562 +  outbuf = newp;
   1.563 +
   1.564 +  newp = (char *) xrealloc (inbuf, new_size);
   1.565 +  inbufptr += newp - inbuf;
   1.566 +  inbuf = newp;
   1.567 +
   1.568 +  for (i = line_size; i < new_size; i++)
   1.569 +    fields[i] = field_omit;
   1.570 +  line_size = new_size;
   1.571 +}
   1.572 +
   1.573 +void
   1.574 +invalid_list ()
   1.575 +{
   1.576 +  error (2, 0, "invalid byte or field list");
   1.577 +}
   1.578 +
   1.579 +void
   1.580 +usage ()
   1.581 +{
   1.582 +  fprintf (stderr, "\
   1.583 +Usage: %s {-b byte-list,--bytes=byte-list} [-n] [file...]\n\
   1.584 +       %s {-c character-list,--characters=character-list} [file...]\n\
   1.585 +       %s {-f field-list,--fields=field-list} [-d delim] [-s]\n\
   1.586 +       [--delimiter=delim] [--only-delimited] [file...]\n",
   1.587 +	   program_name, program_name, program_name);
   1.588 +  exit (2);
   1.589 +}