Mercurial > docs > cut
diff code/cut.c__gnu.1992-11-08 @ 14:21ad1c1548c4
Code ausgewaehlter Implementierungen eingefuegt
Das Datum entspricht dem Dateiaenderungsdatum.
author | markus schnalke <meillo@marmaro.de> |
---|---|
date | Tue, 12 May 2015 06:46:59 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/cut.c__gnu.1992-11-08 Tue May 12 06:46:59 2015 +0200 @@ -0,0 +1,586 @@ +/* cut - remove parts of lines of files + Copyright (C) 1984 by David M. Ihnat + + This program is a total rewrite of the Bell Laboratories Unix(Tm) + command of the same name, as of System V. It contains no proprietary + code, and therefore may be used without violation of any proprietary + agreements whatsoever. However, you will notice that the program is + copyrighted by me. This is to assure the program does *not* fall + into the public domain. Thus, I may specify just what I am now: + This program may be freely copied and distributed, provided this notice + remains; it may not be sold for profit without express written consent of + the author. + Please note that I recreated the behavior of the Unix(Tm) 'cut' command + as faithfully as possible; however, I haven't run a full set of regression + tests. Thus, the user of this program accepts full responsibility for any + effects or loss; in particular, the author is not responsible for any losses, + explicit or incidental, that may be incurred through use of this program. + + I ask that any bugs (and, if possible, fixes) be reported to me when + possible. -David Ihnat (312) 784-4544 ignatz@homebru.chi.il.us + + POSIX changes, bug fixes, long-named options, and cleanup + by David MacKenzie <djm@ai.mit.edu>. + + Options: + --bytes=byte-list + -b byte-list Print only the bytes in positions listed + in BYTE-LIST. + Tabs and backspaces are treated like any + other character; they take up 1 byte. + + --characters=character-list + -c character-list Print only characters in positions listed + in CHARACTER-LIST. + The same as -b for now, but + internationalization will change that. + Tabs and backspaces are treated like any + other character; they take up 1 character. + + --fields=field-list + -f field-list Print only the fields listed in FIELD-LIST. + Fields are separated by a TAB by default. + + --delimiter=delim + -d delim For -f, fields are separated by the first + character in DELIM instead of TAB. + + -n Do not split multibyte chars (no-op for now). + + --only-delimited + -s For -f, do not print lines that do not contain + the field separator character. + + The BYTE-LIST, CHARACTER-LIST, and FIELD-LIST are one or more numbers + or ranges separated by commas. The first byte, character, and field + are numbered 1. + + A FILE of `-' means standard input. */ + +#define _GNU_SOURCE +#include <ctype.h> +#ifndef isblank +#define isblank(c) ((c) == ' ' || (c) == '\t') +#endif +#include <stdio.h> +#include <getopt.h> +#include <sys/types.h> +#include "system.h" + +#ifdef isascii +#define ISDIGIT(c) (isascii ((c)) && isdigit ((c))) +#else +#define ISDIGIT(c) (isdigit ((c))) +#endif + +char *xmalloc (); +char *xrealloc (); +int set_fields (); +int cut_file (); +void cut_stream (); +void cut_bytes (); +void cut_fields (); +void enlarge_line (); +void error (); +void invalid_list (); +void usage (); + +/* The number of elements allocated for the input line + and the byte or field number. + Enlarged as necessary. */ +int line_size; + +/* Processed output buffer. */ +char *outbuf; + +/* Where to save next char to output. */ +char *outbufptr; + +/* Raw line buffer for field mode. */ +char *inbuf; + +/* Where to save next input char. */ +char *inbufptr; + +/* What can be done about a byte or field. */ +enum field_action +{ + field_omit, + field_output +}; + +/* In byte mode, which bytes to output. + In field mode, which `delim'-separated fields to output. + Both bytes and fields are numbered starting with 1, + so the first element of `fields' is unused. */ +enum field_action *fields; + +enum operating_mode +{ + undefined_mode, + + /* Output characters that are in the given bytes. */ + byte_mode, + + /* Output the given delimeter-separated fields. */ + field_mode +}; + +enum operating_mode operating_mode; + +/* If nonzero, + for field mode, do not output lines containing no delimeter characters. */ +int delimited_lines_only; + +/* The delimeter character for field mode. */ +unsigned char delim; + +/* Nonzero if we have ever read standard input. */ +int have_read_stdin; + +/* The name this program was run with. */ +char *program_name; + +struct option longopts[] = +{ + {"bytes", 1, 0, 'b'}, + {"characters", 1, 0, 'c'}, + {"fields", 1, 0, 'f'}, + {"delimiter", 1, 0, 'd'}, + {"only-delimited", 0, 0, 's'}, + {0, 0, 0, 0} +}; + +void +main (argc, argv) + int argc; + char **argv; +{ + int optc, exit_status = 0; + + program_name = argv[0]; + + line_size = 512; + operating_mode = undefined_mode; + delimited_lines_only = 0; + delim = '\0'; + have_read_stdin = 0; + + fields = (enum field_action *) + xmalloc (line_size * sizeof (enum field_action)); + outbuf = (char *) xmalloc (line_size); + inbuf = (char *) xmalloc (line_size); + + for (optc = 0; optc < line_size; optc++) + fields[optc] = field_omit; + + while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, (int *) 0)) + != EOF) + { + switch (optc) + { + case 'b': + case 'c': + /* Build the byte list. */ + if (operating_mode != undefined_mode) + usage (); + operating_mode = byte_mode; + if (set_fields (optarg) == 0) + error (2, 0, "no fields given"); + break; + + case 'f': + /* Build the field list. */ + if (operating_mode != undefined_mode) + usage (); + operating_mode = field_mode; + if (set_fields (optarg) == 0) + error (2, 0, "no fields given"); + break; + + case 'd': + /* New delimiter. */ + if (optarg[0] == '\0') + error (2, 0, "no delimiter given"); + if (optarg[1] != '\0') + error (2, 0, "delimiter must be a single character"); + delim = optarg[0]; + break; + + case 'n': + break; + + case 's': + delimited_lines_only++; + break; + + default: + usage (); + } + } + + if (operating_mode == undefined_mode) + usage (); + + if ((delimited_lines_only || delim != '\0') && operating_mode != field_mode) + usage (); + + if (delim == '\0') + delim = '\t'; + + if (optind == argc) + exit_status |= cut_file ("-"); + else + for (; optind < argc; optind++) + exit_status |= cut_file (argv[optind]); + + if (have_read_stdin && fclose (stdin) == EOF) + { + error (0, errno, "-"); + exit_status = 1; + } + if (ferror (stdout) || fclose (stdout) == EOF) + error (1, 0, "write error"); + + exit (exit_status); +} + +/* Select for printing the positions in `fields' that are listed in + byte or field specification FIELDSTR. FIELDSTR should be + composed of one or more numbers or ranges of numbers, separated by + blanks or commas. Incomplete ranges may be given: `-m' means + `1-m'; `n-' means `n' through end of line or last field. + + Return the number of fields selected. */ + +int +set_fields (fieldstr) + char *fieldstr; +{ + int initial = 1; /* Value of first number in a range. */ + int dash_found = 0; /* Nonzero if a '-' is found in this field. */ + int value = 0; /* If nonzero, a number being accumulated. */ + int fields_selected = 0; /* Number of fields selected so far. */ + /* If nonzero, index of first field in a range that goes to end of line. */ + int eol_range_start = 0; + + for (;;) + { + if (*fieldstr == '-') + { + /* Starting a range. */ + if (dash_found) + invalid_list (); + dash_found++; + fieldstr++; + + if (value) + { + if (value >= line_size) + enlarge_line (value); + initial = value; + value = 0; + } + else + initial = 1; + } + else if (*fieldstr == ',' || isblank (*fieldstr) || *fieldstr == '\0') + { + /* Ending the string, or this field/byte sublist. */ + if (dash_found) + { + dash_found = 0; + + /* A range. Possibilites: -n, m-n, n-. + In any case, `initial' contains the start of the range. */ + if (value == 0) + { + /* `n-'. From `initial' to end of line. */ + eol_range_start = initial; + fields_selected++; + } + else + { + /* `m-n' or `-n' (1-n). */ + if (value < initial) + invalid_list (); + + if (value >= line_size) + enlarge_line (value); + + /* Is there already a range going to end of line? */ + if (eol_range_start != 0) + { + /* Yes. Is the new sequence already contained + in the old one? If so, no processing is + necessary. */ + if (initial < eol_range_start) + { + /* No, the new sequence starts before the + old. Does the old range going to end of line + extend into the new range? */ + if (eol_range_start < value) + /* Yes. Simply move the end of line marker. */ + eol_range_start = initial; + else + { + /* No. A simple range, before and disjoint from + the range going to end of line. Fill it. */ + for (; initial <= value; initial++) + fields[initial] = field_output; + } + + /* In any case, some fields were selected. */ + fields_selected++; + } + } + else + { + /* There is no range going to end of line. */ + for (; initial <= value; initial++) + fields[initial] = field_output; + fields_selected++; + } + value = 0; + } + } + else if (value != 0) + { + /* A simple field number, not a range. */ + if (value >= line_size) + enlarge_line (value); + + fields[value] = field_output; + value = 0; + fields_selected++; + } + + if (*fieldstr == '\0') + { + /* If there was a range going to end of line, fill the + array from the end of line point. */ + if (eol_range_start) + for (initial = eol_range_start; initial < line_size; initial++) + fields[initial] = field_output; + + return fields_selected; + } + + fieldstr++; + } + else if (ISDIGIT (*fieldstr)) + { + value = 10 * value + *fieldstr - '0'; + fieldstr++; + } + else + invalid_list (); + } +} + +/* Process file FILE to standard output. + Return 0 if successful, 1 if not. */ + +int +cut_file (file) + char *file; +{ + FILE *stream; + + if (!strcmp (file, "-")) + { + have_read_stdin = 1; + stream = stdin; + } + else + { + stream = fopen (file, "r"); + if (stream == NULL) + { + error (0, errno, "%s", file); + return 1; + } + } + + cut_stream (stream); + + if (ferror (stream)) + { + error (0, errno, "%s", file); + return 1; + } + if (!strcmp (file, "-")) + clearerr (stream); /* Also clear EOF. */ + else if (fclose (stream) == EOF) + { + error (0, errno, "%s", file); + return 1; + } + return 0; +} + +void +cut_stream (stream) + FILE *stream; +{ + if (operating_mode == byte_mode) + cut_bytes (stream); + else + cut_fields (stream); +} + +/* Print the file open for reading on stream STREAM + with the bytes marked `field_omit' in `fields' removed from each line. */ + +void +cut_bytes (stream) + FILE *stream; +{ + register int c; /* Each character from the file. */ + int doneflag = 0; /* Nonzero if EOF reached. */ + int char_count; /* Number of chars in the line so far. */ + + while (doneflag == 0) + { + /* Start processing a line. */ + outbufptr = outbuf; + char_count = 0; + + do + { + c = getc (stream); + if (c == EOF) + { + doneflag++; + break; + } + + /* If this character is to be sent, stow it in the outbuffer. */ + + if (++char_count == line_size - 1) + enlarge_line (char_count); + + if (fields[char_count] == field_output || c == '\n') + *outbufptr++ = c; + } + while (c != '\n'); + + if (char_count) + fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout); + } +} + +/* Print the file open for reading on stream STREAM + with the fields marked `field_omit' in `fields' removed from each line. + All characters are initially stowed in the raw input buffer, until + at least one field has been found. */ + +void +cut_fields (stream) + FILE *stream; +{ + register int c; /* Each character from the file. */ + int doneflag = 0; /* Nonzero if EOF reached. */ + int char_count; /* Number of chars in line before any delim. */ + int fieldfound; /* Nonzero if any fields to print found. */ + int curr_field; /* Current index in `fields'. */ + + while (doneflag == 0) + { + char_count = 0; + fieldfound = 0; + curr_field = 1; + outbufptr = outbuf; + inbufptr = inbuf; + + do + { + c = getc (stream); + if (c == EOF) + { + doneflag++; + break; + } + + if (fields[curr_field] == field_output && c != '\n') + { + /* Working on a field. It, and its terminating + delimiter, go only into the processed buffer. */ + fieldfound = 1; + if (outbufptr - outbuf == line_size - 2) + enlarge_line (outbufptr - outbuf); + *outbufptr++ = c; + } + else if (fieldfound == 0) + { + if (++char_count == line_size - 1) + enlarge_line (char_count); + *inbufptr++ = c; + } + + if (c == delim && ++curr_field == line_size - 1) + enlarge_line (curr_field); + } + while (c != '\n'); + + if (fieldfound) + { + /* Something was found. Print it. */ + if (outbufptr[-1] == delim) + --outbufptr; /* Suppress trailing delimiter. */ + + fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout); + if (c == '\n') + putc (c, stdout); + } + else if (!delimited_lines_only && char_count) + /* A line with some characters, no delimiters, and no + suppression. Print it. */ + fwrite (inbuf, sizeof (char), inbufptr - inbuf, stdout); + } +} + +/* Extend the buffers to accomodate at least NEW_SIZE characters. */ + +void +enlarge_line (new_size) + int new_size; +{ + char *newp; + int i; + + new_size += 256; /* Leave some room to grow. */ + + fields = (enum field_action *) + xrealloc (fields, new_size * sizeof (enum field_action)); + + newp = (char *) xrealloc (outbuf, new_size); + outbufptr += newp - outbuf; + outbuf = newp; + + newp = (char *) xrealloc (inbuf, new_size); + inbufptr += newp - inbuf; + inbuf = newp; + + for (i = line_size; i < new_size; i++) + fields[i] = field_omit; + line_size = new_size; +} + +void +invalid_list () +{ + error (2, 0, "invalid byte or field list"); +} + +void +usage () +{ + fprintf (stderr, "\ +Usage: %s {-b byte-list,--bytes=byte-list} [-n] [file...]\n\ + %s {-c character-list,--characters=character-list} [file...]\n\ + %s {-f field-list,--fields=field-list} [-d delim] [-s]\n\ + [--delimiter=delim] [--only-delimited] [file...]\n", + program_name, program_name, program_name); + exit (2); +}