# HG changeset patch # User markus schnalke # Date 1431406019 -7200 # Node ID 21ad1c1548c45ee743c1245000fa3ad13947bc50 # Parent bf5e41260f896987ccb5e03e62dc8df4b7671b12 Code ausgewaehlter Implementierungen eingefuegt Das Datum entspricht dem Dateiaenderungsdatum. diff -r bf5e41260f89 -r 21ad1c1548c4 code/cut.c__4.3bsd-reno.1990-06-25 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/cut.c__4.3bsd-reno.1990-06-25 Tue May 12 06:46:59 2015 +0200 @@ -0,0 +1,256 @@ +/* + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. + * + * Redistribution and use in source and binary forms are permitted provided + * that: (1) source distributions retain this entire copyright notice and + * comment, and (2) distributions including binaries display the following + * acknowledgement: ``This product includes software developed by the + * University of California, Berkeley and its contributors'' in the + * documentation or other materials provided with the distribution and in + * all advertising materials mentioning features or use of this software. + * Neither the name of the University nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef lint +char copyright[] = +"@(#) Copyright (c) 1989 The Regents of the University of California.\n\ + All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)cut.c 5.3 (Berkeley) 6/24/90"; +#endif /* not lint */ + +#include +#include +#include + +int cflag; +char dchar; +int dflag; +int fflag; +int sflag; + +main(argc, argv) + int argc; + char **argv; +{ + extern char *optarg; + extern int errno, optind; + FILE *fp; + int ch, (*fcn)(), c_cut(), f_cut(); + char *strerror(); + + dchar = '\t'; /* default delimiter is \t */ + + while ((ch = getopt(argc, argv, "c:d:f:s")) != EOF) + switch(ch) { + case 'c': + fcn = c_cut; + get_list(optarg); + cflag = 1; + break; + case 'd': + dchar = *optarg; + dflag = 1; + break; + case 'f': + get_list(optarg); + fcn = f_cut; + fflag = 1; + break; + case 's': + sflag = 1; + break; + case '?': + default: + usage(); + } + argc -= optind; + argv += optind; + + if (fflag) { + if (cflag) + usage(); + } else if (!cflag || dflag || sflag) + usage(); + + if (*argv) + for (; *argv; ++argv) { + if (!(fp = fopen(*argv, "r"))) { + (void)fprintf(stderr, + "cut: %s: %s\n", *argv, strerror(errno)); + exit(1); + } + fcn(fp, *argv); + } + else + fcn(stdin, "stdin"); + exit(0); +} + +int autostart, autostop, maxval; + +char positions[_BSD_LINE_MAX + 1]; + +get_list(list) + char *list; +{ + register char *pos; + register int setautostart, start, stop; + char *p, *strtok(); + + /* + * set a byte in the positions array to indicate if a field or + * column is to be selected; use +1, it's 1-based, not 0-based. + * This parser is less restrictive than the Draft 9 POSIX spec. + * POSIX doesn't allow lists that aren't in increasing order or + * overlapping lists. We also handle "-3-5" although there's no + * real reason too. + */ + for (; p = strtok(list, ", \t"); list = NULL) { + setautostart = start = stop = 0; + if (*p == '-') { + ++p; + setautostart = 1; + } + if (isdigit(*p)) { + start = stop = strtol(p, &p, 10); + if (setautostart && start > autostart) + autostart = start; + } + if (*p == '-') { + if (isdigit(p[1])) + stop = strtol(p + 1, &p, 10); + if (*p == '-') { + ++p; + if (!autostop || autostop > stop) + autostop = stop; + } + } + if (*p) + badlist("illegal list value"); + if (!stop || !start) + badlist("values may not include zero"); + if (stop > _BSD_LINE_MAX) { + /* positions used rather than allocate a new buffer */ + (void)sprintf(positions, "%d too large (max %d)", + stop, _BSD_LINE_MAX); + badlist(positions); + } + if (maxval < stop) + maxval = stop; + for (pos = positions + start; start++ <= stop; *pos++ = 1); + } + + /* overlapping ranges */ + if (autostop && maxval > autostop) + maxval = autostop; + + /* set autostart */ + if (autostart) + memset(positions + 1, '1', autostart); +} + +/* ARGSUSED */ +c_cut(fp, fname) + FILE *fp; + char *fname; +{ + register int ch, col; + register char *pos; + + for (;;) { + pos = positions + 1; + for (col = maxval; col; --col) { + if ((ch = getc(fp)) == EOF) + return; + if (ch == '\n') + break; + if (*pos++) + putchar(ch); + } + if (ch != '\n') + if (autostop) + while ((ch = getc(fp)) != EOF && ch != '\n') + putchar(ch); + else + while ((ch = getc(fp)) != EOF && ch != '\n'); + putchar('\n'); + } +} + +f_cut(fp, fname) + FILE *fp; + char *fname; +{ + register int ch, field, isdelim; + register char *pos, *p, sep; + int output; + char lbuf[_BSD_LINE_MAX + 1]; + + for (sep = dchar, output = 0; fgets(lbuf, sizeof(lbuf), fp);) { + for (isdelim = 0, p = lbuf;; ++p) { + if (!(ch = *p)) { + (void)fprintf(stderr, + "cut: %s: line too long.\n", fname); + exit(1); + } + /* this should work if newline is delimiter */ + if (ch == sep) + isdelim = 1; + if (ch == '\n') { + if (!isdelim && !sflag) + (void)printf("%s", lbuf); + break; + } + } + if (!isdelim) + continue; + + pos = positions + 1; + for (field = maxval, p = lbuf; field; --field, ++pos) { + if (*pos) { + if (output++) + putchar(sep); + while ((ch = *p++) != '\n' && ch != sep) + putchar(ch); + } else + while ((ch = *p++) != '\n' && ch != sep); + if (ch == '\n') + break; + } + if (ch != '\n') + if (autostop) { + if (output) + putchar(sep); + for (; (ch = *p) != '\n'; ++p) + putchar(ch); + } else + for (; (ch = *p) != '\n'; ++p); + putchar('\n'); + } +} + +badlist(msg) + char *msg; +{ + (void)fprintf(stderr, "cut: [-cf] list: %s.\n", msg); + exit(1); +} + +usage() +{ + (void)fprintf(stderr, +"usage:\tcut -c list [file1 ...]\n\tcut -f list [-s] [-d delim] [file ...]\n"); + exit(1); +} diff -r bf5e41260f89 -r 21ad1c1548c4 code/cut.c__4.3bsd-uwisc.1986-11-07 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/cut.c__4.3bsd-uwisc.1986-11-07 Tue May 12 06:46:59 2015 +0200 @@ -0,0 +1,125 @@ +static char sccsid[] = "@(#)cut.c 1.3"; +# +/* cut : cut and paste columns of a table (projection of a relation) */ +/* Release 1.5; handles single backspaces as produced by nroff */ +# include /* make: cc cut.c */ +# define NFIELDS 512 /* max no of fields or resulting line length */ +# define BACKSPACE 8 +main(argc, argv) +int argc; char **argv; +{ + int del = '\t'; + int i, j, count, poscnt, r, s, t; + int endflag, supflag, cflag, fflag, backflag, filenr; + int sel[NFIELDS]; + register int c; + register char *p1; + char *p2, outbuf[NFIELDS]; + FILE *inptr; + endflag = supflag = cflag = fflag = 0; + + +while (argc > 1 && argv[1][0] == '-'){ + for (i = 1; (c = argv[1][i]) != '\0'; i++) { + switch(c) { + case 'd' : del = argv[1][++i]; + if (del == '\0') diag("no delimiter\n"); + break; + case 's': supflag++ ; + break; + case 'c': cflag++ ; + break; + case 'f': fflag++ ; + break; + default : diag("Usage: cut [-s] [-d] {-c | -f} file ...\n"); + break; + } + if (!endflag && (cflag || fflag)) { + endflag = 1; + r = s = t = 0; + do { c = argv[1][++i]; + switch(c) { + case '-' : if (r) diagl(); + r = 1; + if (t == 0) s = 1; + else {s = t; t = 0;} + continue; + case '\0' : + case ',' : if (t >= NFIELDS) diagl(); + if (r) { if (t == 0) t = NFIELDS - 1; + if (t 0 ? 1 : 0); + r = s = t = 0; + if (c == '\0') {i--; break;} + continue; + default : + if (c< '0' || c> '9') diagl(); + t = 10*t + c - '0'; + continue; + } + for (j = t = 0; j < NFIELDS; j++) t += sel[j]; + if (t == 0) diag("no fields\n"); + } while (c != '\0'); + } + } + --argc; + ++argv; +} /* end options */ +if (!(cflag || fflag)) diagl(); + +--argc; +filenr = 1; +do { /* for all input files */ + if (argc > 0) inptr = fopen(argv[filenr], "r"); + else inptr = stdin; + + if (inptr == NULL) { + write(2,"Cannot open :",14); + diag(argv[filenr]); + } + endflag = 0; + do { /* for all lines of a file */ + count = poscnt = backflag = 0; + p1 = &outbuf[0] - 1 ; + p2 = p1; + do { /* for all char of the line */ + c = fgetc(inptr); + if (c == EOF) { + endflag = 1; + break; + } + if (count == NFIELDS - 1) diag("line too long\n"); + if (c != '\n') *++p1 = c; + if (cflag && (c == BACKSPACE)) backflag++ ; else + { if ( !backflag ) poscnt += 1 ; else backflag-- ;} + if ( backflag > 1 ) diag("cannot handle multiple adjacent backspaces\n"); + if ( ((c == '\n') && count > 0) || c == del || cflag) { + count += 1; + if (fflag) poscnt = count ; + if (sel[poscnt]) p2 = p1; else p1 = p2; + } + }while (c != '\n'); + if ( !endflag && (count > 0 || !supflag)) { + if (*p1 == del) *p1 = '\0'; + else *++p1 = '\0'; /*suppress trailing delimiter*/ + puts(outbuf); + } + } while (!endflag) ; +fclose(inptr); +} while(++filenr <= argc); +} + +diag(s) +char *s; +{ + write(2, "cut : ", 6); + while(*s) + write(2,s++,1); + exit(2); +} +diagl() +{ +diag("bad list for c/f option\n"); +} diff -r bf5e41260f89 -r 21ad1c1548c4 code/cut.c__freebsd.1994-05-27 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/cut.c__freebsd.1994-05-27 Tue May 12 06:46:59 2015 +0200 @@ -0,0 +1,296 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1989, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)cut.c 8.1 (Berkeley) 6/6/93"; +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include + +int cflag; +char dchar; +int dflag; +int fflag; +int sflag; + +void c_cut __P((FILE *, char *)); +void err __P((const char *, ...)); +void f_cut __P((FILE *, char *)); +void get_list __P((char *)); +void usage __P((void)); + +int +main(argc, argv) + int argc; + char *argv[]; +{ + FILE *fp; + void (*fcn) __P((FILE *, char *)); + int ch; + + dchar = '\t'; /* default delimiter is \t */ + + while ((ch = getopt(argc, argv, "c:d:f:s")) != EOF) + switch(ch) { + case 'c': + fcn = c_cut; + get_list(optarg); + cflag = 1; + break; + case 'd': + dchar = *optarg; + dflag = 1; + break; + case 'f': + get_list(optarg); + fcn = f_cut; + fflag = 1; + break; + case 's': + sflag = 1; + break; + case '?': + default: + usage(); + } + argc -= optind; + argv += optind; + + if (fflag) { + if (cflag) + usage(); + } else if (!cflag || dflag || sflag) + usage(); + + if (*argv) + for (; *argv; ++argv) { + if (!(fp = fopen(*argv, "r"))) + err("%s: %s\n", *argv, strerror(errno)); + fcn(fp, *argv); + (void)fclose(fp); + } + else + fcn(stdin, "stdin"); + exit(0); +} + +int autostart, autostop, maxval; + +char positions[_POSIX2_LINE_MAX + 1]; + +void +get_list(list) + char *list; +{ + register int setautostart, start, stop; + register char *pos; + char *p; + + /* + * set a byte in the positions array to indicate if a field or + * column is to be selected; use +1, it's 1-based, not 0-based. + * This parser is less restrictive than the Draft 9 POSIX spec. + * POSIX doesn't allow lists that aren't in increasing order or + * overlapping lists. We also handle "-3-5" although there's no + * real reason too. + */ + for (; p = strtok(list, ", \t"); list = NULL) { + setautostart = start = stop = 0; + if (*p == '-') { + ++p; + setautostart = 1; + } + if (isdigit(*p)) { + start = stop = strtol(p, &p, 10); + if (setautostart && start > autostart) + autostart = start; + } + if (*p == '-') { + if (isdigit(p[1])) + stop = strtol(p + 1, &p, 10); + if (*p == '-') { + ++p; + if (!autostop || autostop > stop) + autostop = stop; + } + } + if (*p) + err("[-cf] list: illegal list value\n"); + if (!stop || !start) + err("[-cf] list: values may not include zero\n"); + if (stop > _POSIX2_LINE_MAX) + err("[-cf] list: %d too large (max %d)\n", + stop, _POSIX2_LINE_MAX); + if (maxval < stop) + maxval = stop; + for (pos = positions + start; start++ <= stop; *pos++ = 1); + } + + /* overlapping ranges */ + if (autostop && maxval > autostop) + maxval = autostop; + + /* set autostart */ + if (autostart) + memset(positions + 1, '1', autostart); +} + +/* ARGSUSED */ +void +c_cut(fp, fname) + FILE *fp; + char *fname; +{ + register int ch, col; + register char *pos; + + for (;;) { + pos = positions + 1; + for (col = maxval; col; --col) { + if ((ch = getc(fp)) == EOF) + return; + if (ch == '\n') + break; + if (*pos++) + (void)putchar(ch); + } + if (ch != '\n') + if (autostop) + while ((ch = getc(fp)) != EOF && ch != '\n') + (void)putchar(ch); + else + while ((ch = getc(fp)) != EOF && ch != '\n'); + (void)putchar('\n'); + } +} + +void +f_cut(fp, fname) + FILE *fp; + char *fname; +{ + register int ch, field, isdelim; + register char *pos, *p, sep; + int output; + char lbuf[_POSIX2_LINE_MAX + 1]; + + for (sep = dchar, output = 0; fgets(lbuf, sizeof(lbuf), fp);) { + for (isdelim = 0, p = lbuf;; ++p) { + if (!(ch = *p)) + err("%s: line too long.\n", fname); + /* this should work if newline is delimiter */ + if (ch == sep) + isdelim = 1; + if (ch == '\n') { + if (!isdelim && !sflag) + (void)printf("%s", lbuf); + break; + } + } + if (!isdelim) + continue; + + pos = positions + 1; + for (field = maxval, p = lbuf; field; --field, ++pos) { + if (*pos) { + if (output++) + (void)putchar(sep); + while ((ch = *p++) != '\n' && ch != sep) + (void)putchar(ch); + } else + while ((ch = *p++) != '\n' && ch != sep); + if (ch == '\n') + break; + } + if (ch != '\n') + if (autostop) { + if (output) + (void)putchar(sep); + for (; (ch = *p) != '\n'; ++p) + (void)putchar(ch); + } else + for (; (ch = *p) != '\n'; ++p); + (void)putchar('\n'); + } +} + +void +usage() +{ + (void)fprintf(stderr, +"usage:\tcut -c list [file1 ...]\n\tcut -f list [-s] [-d delim] [file ...]\n"); + exit(1); +} + +#if __STDC__ +#include +#else +#include +#endif + +void +#if __STDC__ +err(const char *fmt, ...) +#else +err(fmt, va_alist) + char *fmt; + va_dcl +#endif +{ + va_list ap; +#if __STDC__ + va_start(ap, fmt); +#else + va_start(ap); +#endif + (void)fprintf(stderr, "cut: "); + (void)vfprintf(stderr, fmt, ap); + va_end(ap); + (void)fprintf(stderr, "\n"); + exit(1); + /* NOTREACHED */ +} diff -r bf5e41260f89 -r 21ad1c1548c4 code/cut.c__freebsd.2012-11-24 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/cut.c__freebsd.2012-11-24 Tue May 12 06:46:59 2015 +0200 @@ -0,0 +1,479 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static const char copyright[] = +"@(#) Copyright (c) 1989, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +static const char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; +#endif /* not lint */ +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int bflag; +static int cflag; +static wchar_t dchar; +static char dcharmb[MB_LEN_MAX + 1]; +static int dflag; +static int fflag; +static int nflag; +static int sflag; +static int wflag; + +static size_t autostart, autostop, maxval; +static char * positions; + +static int b_cut(FILE *, const char *); +static int b_n_cut(FILE *, const char *); +static int c_cut(FILE *, const char *); +static int f_cut(FILE *, const char *); +static void get_list(char *); +static int is_delim(wchar_t); +static void needpos(size_t); +static void usage(void); + +int +main(int argc, char *argv[]) +{ + FILE *fp; + int (*fcn)(FILE *, const char *); + int ch, rval; + size_t n; + + setlocale(LC_ALL, ""); + + fcn = NULL; + dchar = '\t'; /* default delimiter is \t */ + strcpy(dcharmb, "\t"); + + while ((ch = getopt(argc, argv, "b:c:d:f:snw")) != -1) + switch(ch) { + case 'b': + get_list(optarg); + bflag = 1; + break; + case 'c': + get_list(optarg); + cflag = 1; + break; + case 'd': + n = mbrtowc(&dchar, optarg, MB_LEN_MAX, NULL); + if (dchar == '\0' || n != strlen(optarg)) + errx(1, "bad delimiter"); + strcpy(dcharmb, optarg); + dflag = 1; + break; + case 'f': + get_list(optarg); + fflag = 1; + break; + case 's': + sflag = 1; + break; + case 'n': + nflag = 1; + break; + case 'w': + wflag = 1; + break; + case '?': + default: + usage(); + } + argc -= optind; + argv += optind; + + if (fflag) { + if (bflag || cflag || nflag || (wflag && dflag)) + usage(); + } else if (!(bflag || cflag) || dflag || sflag || wflag) + usage(); + else if (!bflag && nflag) + usage(); + + if (fflag) + fcn = f_cut; + else if (cflag) + fcn = MB_CUR_MAX > 1 ? c_cut : b_cut; + else if (bflag) + fcn = nflag && MB_CUR_MAX > 1 ? b_n_cut : b_cut; + + rval = 0; + if (*argv) + for (; *argv; ++argv) { + if (strcmp(*argv, "-") == 0) + rval |= fcn(stdin, "stdin"); + else { + if (!(fp = fopen(*argv, "r"))) { + warn("%s", *argv); + rval = 1; + continue; + } + fcn(fp, *argv); + (void)fclose(fp); + } + } + else + rval = fcn(stdin, "stdin"); + exit(rval); +} + +static void +get_list(char *list) +{ + size_t setautostart, start, stop; + char *pos; + char *p; + + /* + * set a byte in the positions array to indicate if a field or + * column is to be selected; use +1, it's 1-based, not 0-based. + * Numbers and number ranges may be overlapping, repeated, and in + * any order. We handle "-3-5" although there's no real reason to. + */ + for (; (p = strsep(&list, ", \t")) != NULL;) { + setautostart = start = stop = 0; + if (*p == '-') { + ++p; + setautostart = 1; + } + if (isdigit((unsigned char)*p)) { + start = stop = strtol(p, &p, 10); + if (setautostart && start > autostart) + autostart = start; + } + if (*p == '-') { + if (isdigit((unsigned char)p[1])) + stop = strtol(p + 1, &p, 10); + if (*p == '-') { + ++p; + if (!autostop || autostop > stop) + autostop = stop; + } + } + if (*p) + errx(1, "[-bcf] list: illegal list value"); + if (!stop || !start) + errx(1, "[-bcf] list: values may not include zero"); + if (maxval < stop) { + maxval = stop; + needpos(maxval + 1); + } + for (pos = positions + start; start++ <= stop; *pos++ = 1); + } + + /* overlapping ranges */ + if (autostop && maxval > autostop) { + maxval = autostop; + needpos(maxval + 1); + } + + /* set autostart */ + if (autostart) + memset(positions + 1, '1', autostart); +} + +static void +needpos(size_t n) +{ + static size_t npos; + size_t oldnpos; + + /* Grow the positions array to at least the specified size. */ + if (n > npos) { + oldnpos = npos; + if (npos == 0) + npos = n; + while (n > npos) + npos *= 2; + if ((positions = realloc(positions, npos)) == NULL) + err(1, "realloc"); + memset((char *)positions + oldnpos, 0, npos - oldnpos); + } +} + +static int +b_cut(FILE *fp, const char *fname __unused) +{ + int ch, col; + char *pos; + + ch = 0; + for (;;) { + pos = positions + 1; + for (col = maxval; col; --col) { + if ((ch = getc(fp)) == EOF) + return (0); + if (ch == '\n') + break; + if (*pos++) + (void)putchar(ch); + } + if (ch != '\n') { + if (autostop) + while ((ch = getc(fp)) != EOF && ch != '\n') + (void)putchar(ch); + else + while ((ch = getc(fp)) != EOF && ch != '\n'); + } + (void)putchar('\n'); + } + return (0); +} + +/* + * Cut based on byte positions, taking care not to split multibyte characters. + * Although this function also handles the case where -n is not specified, + * b_cut() ought to be much faster. + */ +static int +b_n_cut(FILE *fp, const char *fname) +{ + size_t col, i, lbuflen; + char *lbuf; + int canwrite, clen, warned; + mbstate_t mbs; + + memset(&mbs, 0, sizeof(mbs)); + warned = 0; + while ((lbuf = fgetln(fp, &lbuflen)) != NULL) { + for (col = 0; lbuflen > 0; col += clen) { + if ((clen = mbrlen(lbuf, lbuflen, &mbs)) < 0) { + if (!warned) { + warn("%s", fname); + warned = 1; + } + memset(&mbs, 0, sizeof(mbs)); + clen = 1; + } + if (clen == 0 || *lbuf == '\n') + break; + if (col < maxval && !positions[1 + col]) { + /* + * Print the character if (1) after an initial + * segment of un-selected bytes, the rest of + * it is selected, and (2) the last byte is + * selected. + */ + i = col; + while (i < col + clen && i < maxval && + !positions[1 + i]) + i++; + canwrite = i < col + clen; + for (; i < col + clen && i < maxval; i++) + canwrite &= positions[1 + i]; + if (canwrite) + fwrite(lbuf, 1, clen, stdout); + } else { + /* + * Print the character if all of it has + * been selected. + */ + canwrite = 1; + for (i = col; i < col + clen; i++) + if ((i >= maxval && !autostop) || + (i < maxval && !positions[1 + i])) { + canwrite = 0; + break; + } + if (canwrite) + fwrite(lbuf, 1, clen, stdout); + } + lbuf += clen; + lbuflen -= clen; + } + if (lbuflen > 0) + putchar('\n'); + } + return (warned); +} + +static int +c_cut(FILE *fp, const char *fname) +{ + wint_t ch; + int col; + char *pos; + + ch = 0; + for (;;) { + pos = positions + 1; + for (col = maxval; col; --col) { + if ((ch = getwc(fp)) == WEOF) + goto out; + if (ch == '\n') + break; + if (*pos++) + (void)putwchar(ch); + } + if (ch != '\n') { + if (autostop) + while ((ch = getwc(fp)) != WEOF && ch != '\n') + (void)putwchar(ch); + else + while ((ch = getwc(fp)) != WEOF && ch != '\n'); + } + (void)putwchar('\n'); + } +out: + if (ferror(fp)) { + warn("%s", fname); + return (1); + } + return (0); +} + +static int +is_delim(wchar_t ch) +{ + if (wflag) { + if (ch == ' ' || ch == '\t') + return 1; + } else { + if (ch == dchar) + return 1; + } + return 0; +} + +static int +f_cut(FILE *fp, const char *fname) +{ + wchar_t ch; + int field, i, isdelim; + char *pos, *p; + int output; + char *lbuf, *mlbuf; + size_t clen, lbuflen, reallen; + + mlbuf = NULL; + while ((lbuf = fgetln(fp, &lbuflen)) != NULL) { + reallen = lbuflen; + /* Assert EOL has a newline. */ + if (*(lbuf + lbuflen - 1) != '\n') { + /* Can't have > 1 line with no trailing newline. */ + mlbuf = malloc(lbuflen + 1); + if (mlbuf == NULL) + err(1, "malloc"); + memcpy(mlbuf, lbuf, lbuflen); + *(mlbuf + lbuflen) = '\n'; + lbuf = mlbuf; + reallen++; + } + output = 0; + for (isdelim = 0, p = lbuf;; p += clen) { + clen = mbrtowc(&ch, p, lbuf + reallen - p, NULL); + if (clen == (size_t)-1 || clen == (size_t)-2) { + warnc(EILSEQ, "%s", fname); + free(mlbuf); + return (1); + } + if (clen == 0) + clen = 1; + /* this should work if newline is delimiter */ + if (is_delim(ch)) + isdelim = 1; + if (ch == '\n') { + if (!isdelim && !sflag) + (void)fwrite(lbuf, lbuflen, 1, stdout); + break; + } + } + if (!isdelim) + continue; + + pos = positions + 1; + for (field = maxval, p = lbuf; field; --field, ++pos) { + if (*pos && output++) + for (i = 0; dcharmb[i] != '\0'; i++) + putchar(dcharmb[i]); + for (;;) { + clen = mbrtowc(&ch, p, lbuf + reallen - p, + NULL); + if (clen == (size_t)-1 || clen == (size_t)-2) { + warnc(EILSEQ, "%s", fname); + free(mlbuf); + return (1); + } + if (clen == 0) + clen = 1; + p += clen; + if (ch == '\n' || is_delim(ch)) { + /* compress whitespace */ + if (wflag && ch != '\n') + while (is_delim(*p)) + p++; + break; + } + if (*pos) + for (i = 0; i < (int)clen; i++) + putchar(p[i - clen]); + } + if (ch == '\n') + break; + } + if (ch != '\n') { + if (autostop) { + if (output) + for (i = 0; dcharmb[i] != '\0'; i++) + putchar(dcharmb[i]); + for (; (ch = *p) != '\n'; ++p) + (void)putchar(ch); + } else + for (; (ch = *p) != '\n'; ++p); + } + (void)putchar('\n'); + } + free(mlbuf); + return (0); +} + +static void +usage(void) +{ + (void)fprintf(stderr, "%s\n%s\n%s\n", + "usage: cut -b list [-n] [file ...]", + " cut -c list [file ...]", + " cut -f list [-s] [-w | -d delim] [file ...]"); + exit(1); +} diff -r bf5e41260f89 -r 21ad1c1548c4 code/cut.c__gnu.1992-11-08 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/cut.c__gnu.1992-11-08 Tue May 12 06:46:59 2015 +0200 @@ -0,0 +1,586 @@ +/* cut - remove parts of lines of files + Copyright (C) 1984 by David M. Ihnat + + This program is a total rewrite of the Bell Laboratories Unix(Tm) + command of the same name, as of System V. It contains no proprietary + code, and therefore may be used without violation of any proprietary + agreements whatsoever. However, you will notice that the program is + copyrighted by me. This is to assure the program does *not* fall + into the public domain. Thus, I may specify just what I am now: + This program may be freely copied and distributed, provided this notice + remains; it may not be sold for profit without express written consent of + the author. + Please note that I recreated the behavior of the Unix(Tm) 'cut' command + as faithfully as possible; however, I haven't run a full set of regression + tests. Thus, the user of this program accepts full responsibility for any + effects or loss; in particular, the author is not responsible for any losses, + explicit or incidental, that may be incurred through use of this program. + + I ask that any bugs (and, if possible, fixes) be reported to me when + possible. -David Ihnat (312) 784-4544 ignatz@homebru.chi.il.us + + POSIX changes, bug fixes, long-named options, and cleanup + by David MacKenzie . + + Options: + --bytes=byte-list + -b byte-list Print only the bytes in positions listed + in BYTE-LIST. + Tabs and backspaces are treated like any + other character; they take up 1 byte. + + --characters=character-list + -c character-list Print only characters in positions listed + in CHARACTER-LIST. + The same as -b for now, but + internationalization will change that. + Tabs and backspaces are treated like any + other character; they take up 1 character. + + --fields=field-list + -f field-list Print only the fields listed in FIELD-LIST. + Fields are separated by a TAB by default. + + --delimiter=delim + -d delim For -f, fields are separated by the first + character in DELIM instead of TAB. + + -n Do not split multibyte chars (no-op for now). + + --only-delimited + -s For -f, do not print lines that do not contain + the field separator character. + + The BYTE-LIST, CHARACTER-LIST, and FIELD-LIST are one or more numbers + or ranges separated by commas. The first byte, character, and field + are numbered 1. + + A FILE of `-' means standard input. */ + +#define _GNU_SOURCE +#include +#ifndef isblank +#define isblank(c) ((c) == ' ' || (c) == '\t') +#endif +#include +#include +#include +#include "system.h" + +#ifdef isascii +#define ISDIGIT(c) (isascii ((c)) && isdigit ((c))) +#else +#define ISDIGIT(c) (isdigit ((c))) +#endif + +char *xmalloc (); +char *xrealloc (); +int set_fields (); +int cut_file (); +void cut_stream (); +void cut_bytes (); +void cut_fields (); +void enlarge_line (); +void error (); +void invalid_list (); +void usage (); + +/* The number of elements allocated for the input line + and the byte or field number. + Enlarged as necessary. */ +int line_size; + +/* Processed output buffer. */ +char *outbuf; + +/* Where to save next char to output. */ +char *outbufptr; + +/* Raw line buffer for field mode. */ +char *inbuf; + +/* Where to save next input char. */ +char *inbufptr; + +/* What can be done about a byte or field. */ +enum field_action +{ + field_omit, + field_output +}; + +/* In byte mode, which bytes to output. + In field mode, which `delim'-separated fields to output. + Both bytes and fields are numbered starting with 1, + so the first element of `fields' is unused. */ +enum field_action *fields; + +enum operating_mode +{ + undefined_mode, + + /* Output characters that are in the given bytes. */ + byte_mode, + + /* Output the given delimeter-separated fields. */ + field_mode +}; + +enum operating_mode operating_mode; + +/* If nonzero, + for field mode, do not output lines containing no delimeter characters. */ +int delimited_lines_only; + +/* The delimeter character for field mode. */ +unsigned char delim; + +/* Nonzero if we have ever read standard input. */ +int have_read_stdin; + +/* The name this program was run with. */ +char *program_name; + +struct option longopts[] = +{ + {"bytes", 1, 0, 'b'}, + {"characters", 1, 0, 'c'}, + {"fields", 1, 0, 'f'}, + {"delimiter", 1, 0, 'd'}, + {"only-delimited", 0, 0, 's'}, + {0, 0, 0, 0} +}; + +void +main (argc, argv) + int argc; + char **argv; +{ + int optc, exit_status = 0; + + program_name = argv[0]; + + line_size = 512; + operating_mode = undefined_mode; + delimited_lines_only = 0; + delim = '\0'; + have_read_stdin = 0; + + fields = (enum field_action *) + xmalloc (line_size * sizeof (enum field_action)); + outbuf = (char *) xmalloc (line_size); + inbuf = (char *) xmalloc (line_size); + + for (optc = 0; optc < line_size; optc++) + fields[optc] = field_omit; + + while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, (int *) 0)) + != EOF) + { + switch (optc) + { + case 'b': + case 'c': + /* Build the byte list. */ + if (operating_mode != undefined_mode) + usage (); + operating_mode = byte_mode; + if (set_fields (optarg) == 0) + error (2, 0, "no fields given"); + break; + + case 'f': + /* Build the field list. */ + if (operating_mode != undefined_mode) + usage (); + operating_mode = field_mode; + if (set_fields (optarg) == 0) + error (2, 0, "no fields given"); + break; + + case 'd': + /* New delimiter. */ + if (optarg[0] == '\0') + error (2, 0, "no delimiter given"); + if (optarg[1] != '\0') + error (2, 0, "delimiter must be a single character"); + delim = optarg[0]; + break; + + case 'n': + break; + + case 's': + delimited_lines_only++; + break; + + default: + usage (); + } + } + + if (operating_mode == undefined_mode) + usage (); + + if ((delimited_lines_only || delim != '\0') && operating_mode != field_mode) + usage (); + + if (delim == '\0') + delim = '\t'; + + if (optind == argc) + exit_status |= cut_file ("-"); + else + for (; optind < argc; optind++) + exit_status |= cut_file (argv[optind]); + + if (have_read_stdin && fclose (stdin) == EOF) + { + error (0, errno, "-"); + exit_status = 1; + } + if (ferror (stdout) || fclose (stdout) == EOF) + error (1, 0, "write error"); + + exit (exit_status); +} + +/* Select for printing the positions in `fields' that are listed in + byte or field specification FIELDSTR. FIELDSTR should be + composed of one or more numbers or ranges of numbers, separated by + blanks or commas. Incomplete ranges may be given: `-m' means + `1-m'; `n-' means `n' through end of line or last field. + + Return the number of fields selected. */ + +int +set_fields (fieldstr) + char *fieldstr; +{ + int initial = 1; /* Value of first number in a range. */ + int dash_found = 0; /* Nonzero if a '-' is found in this field. */ + int value = 0; /* If nonzero, a number being accumulated. */ + int fields_selected = 0; /* Number of fields selected so far. */ + /* If nonzero, index of first field in a range that goes to end of line. */ + int eol_range_start = 0; + + for (;;) + { + if (*fieldstr == '-') + { + /* Starting a range. */ + if (dash_found) + invalid_list (); + dash_found++; + fieldstr++; + + if (value) + { + if (value >= line_size) + enlarge_line (value); + initial = value; + value = 0; + } + else + initial = 1; + } + else if (*fieldstr == ',' || isblank (*fieldstr) || *fieldstr == '\0') + { + /* Ending the string, or this field/byte sublist. */ + if (dash_found) + { + dash_found = 0; + + /* A range. Possibilites: -n, m-n, n-. + In any case, `initial' contains the start of the range. */ + if (value == 0) + { + /* `n-'. From `initial' to end of line. */ + eol_range_start = initial; + fields_selected++; + } + else + { + /* `m-n' or `-n' (1-n). */ + if (value < initial) + invalid_list (); + + if (value >= line_size) + enlarge_line (value); + + /* Is there already a range going to end of line? */ + if (eol_range_start != 0) + { + /* Yes. Is the new sequence already contained + in the old one? If so, no processing is + necessary. */ + if (initial < eol_range_start) + { + /* No, the new sequence starts before the + old. Does the old range going to end of line + extend into the new range? */ + if (eol_range_start < value) + /* Yes. Simply move the end of line marker. */ + eol_range_start = initial; + else + { + /* No. A simple range, before and disjoint from + the range going to end of line. Fill it. */ + for (; initial <= value; initial++) + fields[initial] = field_output; + } + + /* In any case, some fields were selected. */ + fields_selected++; + } + } + else + { + /* There is no range going to end of line. */ + for (; initial <= value; initial++) + fields[initial] = field_output; + fields_selected++; + } + value = 0; + } + } + else if (value != 0) + { + /* A simple field number, not a range. */ + if (value >= line_size) + enlarge_line (value); + + fields[value] = field_output; + value = 0; + fields_selected++; + } + + if (*fieldstr == '\0') + { + /* If there was a range going to end of line, fill the + array from the end of line point. */ + if (eol_range_start) + for (initial = eol_range_start; initial < line_size; initial++) + fields[initial] = field_output; + + return fields_selected; + } + + fieldstr++; + } + else if (ISDIGIT (*fieldstr)) + { + value = 10 * value + *fieldstr - '0'; + fieldstr++; + } + else + invalid_list (); + } +} + +/* Process file FILE to standard output. + Return 0 if successful, 1 if not. */ + +int +cut_file (file) + char *file; +{ + FILE *stream; + + if (!strcmp (file, "-")) + { + have_read_stdin = 1; + stream = stdin; + } + else + { + stream = fopen (file, "r"); + if (stream == NULL) + { + error (0, errno, "%s", file); + return 1; + } + } + + cut_stream (stream); + + if (ferror (stream)) + { + error (0, errno, "%s", file); + return 1; + } + if (!strcmp (file, "-")) + clearerr (stream); /* Also clear EOF. */ + else if (fclose (stream) == EOF) + { + error (0, errno, "%s", file); + return 1; + } + return 0; +} + +void +cut_stream (stream) + FILE *stream; +{ + if (operating_mode == byte_mode) + cut_bytes (stream); + else + cut_fields (stream); +} + +/* Print the file open for reading on stream STREAM + with the bytes marked `field_omit' in `fields' removed from each line. */ + +void +cut_bytes (stream) + FILE *stream; +{ + register int c; /* Each character from the file. */ + int doneflag = 0; /* Nonzero if EOF reached. */ + int char_count; /* Number of chars in the line so far. */ + + while (doneflag == 0) + { + /* Start processing a line. */ + outbufptr = outbuf; + char_count = 0; + + do + { + c = getc (stream); + if (c == EOF) + { + doneflag++; + break; + } + + /* If this character is to be sent, stow it in the outbuffer. */ + + if (++char_count == line_size - 1) + enlarge_line (char_count); + + if (fields[char_count] == field_output || c == '\n') + *outbufptr++ = c; + } + while (c != '\n'); + + if (char_count) + fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout); + } +} + +/* Print the file open for reading on stream STREAM + with the fields marked `field_omit' in `fields' removed from each line. + All characters are initially stowed in the raw input buffer, until + at least one field has been found. */ + +void +cut_fields (stream) + FILE *stream; +{ + register int c; /* Each character from the file. */ + int doneflag = 0; /* Nonzero if EOF reached. */ + int char_count; /* Number of chars in line before any delim. */ + int fieldfound; /* Nonzero if any fields to print found. */ + int curr_field; /* Current index in `fields'. */ + + while (doneflag == 0) + { + char_count = 0; + fieldfound = 0; + curr_field = 1; + outbufptr = outbuf; + inbufptr = inbuf; + + do + { + c = getc (stream); + if (c == EOF) + { + doneflag++; + break; + } + + if (fields[curr_field] == field_output && c != '\n') + { + /* Working on a field. It, and its terminating + delimiter, go only into the processed buffer. */ + fieldfound = 1; + if (outbufptr - outbuf == line_size - 2) + enlarge_line (outbufptr - outbuf); + *outbufptr++ = c; + } + else if (fieldfound == 0) + { + if (++char_count == line_size - 1) + enlarge_line (char_count); + *inbufptr++ = c; + } + + if (c == delim && ++curr_field == line_size - 1) + enlarge_line (curr_field); + } + while (c != '\n'); + + if (fieldfound) + { + /* Something was found. Print it. */ + if (outbufptr[-1] == delim) + --outbufptr; /* Suppress trailing delimiter. */ + + fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout); + if (c == '\n') + putc (c, stdout); + } + else if (!delimited_lines_only && char_count) + /* A line with some characters, no delimiters, and no + suppression. Print it. */ + fwrite (inbuf, sizeof (char), inbufptr - inbuf, stdout); + } +} + +/* Extend the buffers to accomodate at least NEW_SIZE characters. */ + +void +enlarge_line (new_size) + int new_size; +{ + char *newp; + int i; + + new_size += 256; /* Leave some room to grow. */ + + fields = (enum field_action *) + xrealloc (fields, new_size * sizeof (enum field_action)); + + newp = (char *) xrealloc (outbuf, new_size); + outbufptr += newp - outbuf; + outbuf = newp; + + newp = (char *) xrealloc (inbuf, new_size); + inbufptr += newp - inbuf; + inbuf = newp; + + for (i = line_size; i < new_size; i++) + fields[i] = field_omit; + line_size = new_size; +} + +void +invalid_list () +{ + error (2, 0, "invalid byte or field list"); +} + +void +usage () +{ + fprintf (stderr, "\ +Usage: %s {-b byte-list,--bytes=byte-list} [-n] [file...]\n\ + %s {-c character-list,--characters=character-list} [file...]\n\ + %s {-f field-list,--fields=field-list} [-d delim] [-s]\n\ + [--delimiter=delim] [--only-delimited] [file...]\n", + program_name, program_name, program_name); + exit (2); +} diff -r bf5e41260f89 -r 21ad1c1548c4 code/cut.c__gnu.2015-05-01 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/cut.c__gnu.2015-05-01 Tue May 12 06:46:59 2015 +0200 @@ -0,0 +1,830 @@ +/* cut - remove parts of lines of files + Copyright (C) 1997-2015 Free Software Foundation, Inc. + Copyright (C) 1984 David M. Ihnat + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by David Ihnat. */ + +/* POSIX changes, bug fixes, long-named options, and cleanup + by David MacKenzie . + + Rewrite cut_fields and cut_bytes -- Jim Meyering. */ + +#include + +#include +#include +#include +#include +#include "system.h" + +#include "error.h" +#include "fadvise.h" +#include "getndelim2.h" +#include "hash.h" +#include "quote.h" +#include "xstrndup.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "cut" + +#define AUTHORS \ + proper_name ("David M. Ihnat"), \ + proper_name ("David MacKenzie"), \ + proper_name ("Jim Meyering") + +#define FATAL_ERROR(Message) \ + do \ + { \ + error (0, 0, (Message)); \ + usage (EXIT_FAILURE); \ + } \ + while (0) + + +struct range_pair + { + size_t lo; + size_t hi; + }; + +/* Array of `struct range_pair' holding all the finite ranges. */ +static struct range_pair *rp; + +/* Pointer inside RP. When checking if a byte or field is selected + by a finite range, we check if it is between CURRENT_RP.LO + and CURRENT_RP.HI. If the byte or field index is greater than + CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */ +static struct range_pair *current_rp; + +/* Number of finite ranges specified by the user. */ +static size_t n_rp; + +/* Number of `struct range_pair's allocated. */ +static size_t n_rp_allocated; + + +/* Append LOW, HIGH to the list RP of range pairs, allocating additional + space if necessary. Update global variable N_RP. When allocating, + update global variable N_RP_ALLOCATED. */ + +static void +add_range_pair (size_t lo, size_t hi) +{ + if (n_rp == n_rp_allocated) + rp = X2NREALLOC (rp, &n_rp_allocated); + rp[n_rp].lo = lo; + rp[n_rp].hi = hi; + ++n_rp; +} + +/* This buffer is used to support the semantics of the -s option + (or lack of same) when the specified field list includes (does + not include) the first field. In both of those cases, the entire + first field must be read into this buffer to determine whether it + is followed by a delimiter or a newline before any of it may be + output. Otherwise, cut_fields can do the job without using this + buffer. */ +static char *field_1_buffer; + +/* The number of bytes allocated for FIELD_1_BUFFER. */ +static size_t field_1_bufsize; + +enum operating_mode + { + undefined_mode, + + /* Output characters that are in the given bytes. */ + byte_mode, + + /* Output the given delimiter-separated fields. */ + field_mode + }; + +static enum operating_mode operating_mode; + +/* If true do not output lines containing no delimiter characters. + Otherwise, all such lines are printed. This option is valid only + with field mode. */ +static bool suppress_non_delimited; + +/* If true, print all bytes, characters, or fields _except_ + those that were specified. */ +static bool complement; + +/* The delimiter character for field mode. */ +static unsigned char delim; + +/* True if the --output-delimiter=STRING option was specified. */ +static bool output_delimiter_specified; + +/* The length of output_delimiter_string. */ +static size_t output_delimiter_length; + +/* The output field separator string. Defaults to the 1-character + string consisting of the input delimiter. */ +static char *output_delimiter_string; + +/* True if we have ever read standard input. */ +static bool have_read_stdin; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1, + COMPLEMENT_OPTION +}; + +static struct option const longopts[] = +{ + {"bytes", required_argument, NULL, 'b'}, + {"characters", required_argument, NULL, 'c'}, + {"fields", required_argument, NULL, 'f'}, + {"delimiter", required_argument, NULL, 'd'}, + {"only-delimited", no_argument, NULL, 's'}, + {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION}, + {"complement", no_argument, NULL, COMPLEMENT_OPTION}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {NULL, 0, NULL, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s OPTION... [FILE]...\n\ +"), + program_name); + fputs (_("\ +Print selected parts of lines from each FILE to standard output.\n\ +"), stdout); + + emit_stdin_note (); + emit_mandatory_arg_note (); + + fputs (_("\ + -b, --bytes=LIST select only these bytes\n\ + -c, --characters=LIST select only these characters\n\ + -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\ +"), stdout); + fputs (_("\ + -f, --fields=LIST select only these fields; also print any line\n\ + that contains no delimiter character, unless\n\ + the -s option is specified\n\ + -n (ignored)\n\ +"), stdout); + fputs (_("\ + --complement complement the set of selected bytes, characters\n\ + or fields\n\ +"), stdout); + fputs (_("\ + -s, --only-delimited do not print lines not containing delimiters\n\ + --output-delimiter=STRING use STRING as the output delimiter\n\ + the default is to use the input delimiter\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\ +range, or many ranges separated by commas. Selected input is written\n\ +in the same order that it is read, and is written exactly once.\n\ +"), stdout); + fputs (_("\ +Each range is one of:\n\ +\n\ + N N'th byte, character or field, counted from 1\n\ + N- from N'th byte, character or field, to end of line\n\ + N-M from N'th to M'th (included) byte, character or field\n\ + -M from first to M'th (included) byte, character or field\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Comparison function for qsort to order the list of + struct range_pairs. */ +static int +compare_ranges (const void *a, const void *b) +{ + int a_start = ((const struct range_pair *) a)->lo; + int b_start = ((const struct range_pair *) b)->lo; + return a_start < b_start ? -1 : a_start > b_start; +} + +/* Reallocate Range Pair entries, with corresponding + entries outside the range of each specified entry. */ + +static void +complement_rp (void) +{ + if (complement) + { + struct range_pair *c = rp; + size_t n = n_rp; + size_t i; + + rp = NULL; + n_rp = 0; + n_rp_allocated = 0; + + if (c[0].lo > 1) + add_range_pair (1, c[0].lo - 1); + + for (i = 1; i < n; ++i) + { + if (c[i-1].hi + 1 == c[i].lo) + continue; + + add_range_pair (c[i-1].hi + 1, c[i].lo - 1); + } + + if (c[n-1].hi < SIZE_MAX) + add_range_pair (c[n-1].hi + 1, SIZE_MAX); + + free (c); + } +} + +/* Given the list of field or byte range specifications FIELDSTR, + allocate and initialize the RP array. FIELDSTR should + be composed of one or more numbers or ranges of numbers, separated + by blanks or commas. Incomplete ranges may be given: '-m' means '1-m'; + 'n-' means 'n' through end of line. + Return true if FIELDSTR contains at least one field specification, + false otherwise. */ + +static bool +set_fields (const char *fieldstr) +{ + size_t initial = 1; /* Value of first number in a range. */ + size_t value = 0; /* If nonzero, a number being accumulated. */ + bool lhs_specified = false; + bool rhs_specified = false; + bool dash_found = false; /* True if a '-' is found in this field. */ + bool field_found = false; /* True if at least one field spec + has been processed. */ + + size_t i; + bool in_digits = false; + + /* Collect and store in RP the range end points. */ + + while (true) + { + if (*fieldstr == '-') + { + in_digits = false; + /* Starting a range. */ + if (dash_found) + FATAL_ERROR (_("invalid byte, character or field list")); + dash_found = true; + fieldstr++; + + if (lhs_specified && !value) + FATAL_ERROR (_("fields and positions are numbered from 1")); + + initial = (lhs_specified ? value : 1); + value = 0; + } + else if (*fieldstr == ',' + || isblank (to_uchar (*fieldstr)) || *fieldstr == '\0') + { + in_digits = false; + /* Ending the string, or this field/byte sublist. */ + if (dash_found) + { + dash_found = false; + + if (!lhs_specified && !rhs_specified) + FATAL_ERROR (_("invalid range with no endpoint: -")); + + /* A range. Possibilities: -n, m-n, n-. + In any case, 'initial' contains the start of the range. */ + if (!rhs_specified) + { + /* 'n-'. From 'initial' to end of line. */ + add_range_pair (initial, SIZE_MAX); + field_found = true; + } + else + { + /* 'm-n' or '-n' (1-n). */ + if (value < initial) + FATAL_ERROR (_("invalid decreasing range")); + + add_range_pair (initial, value); + field_found = true; + } + value = 0; + } + else + { + /* A simple field number, not a range. */ + if (value == 0) + FATAL_ERROR (_("fields and positions are numbered from 1")); + add_range_pair (value, value); + value = 0; + field_found = true; + } + + if (*fieldstr == '\0') + break; + + fieldstr++; + lhs_specified = false; + rhs_specified = false; + } + else if (ISDIGIT (*fieldstr)) + { + /* Record beginning of digit string, in case we have to + complain about it. */ + static char const *num_start; + if (!in_digits || !num_start) + num_start = fieldstr; + in_digits = true; + + if (dash_found) + rhs_specified = 1; + else + lhs_specified = 1; + + /* Detect overflow. */ + if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t) + || value == SIZE_MAX) + { + /* In case the user specified -c$(echo 2^64|bc),22, + complain only about the first number. */ + /* Determine the length of the offending number. */ + size_t len = strspn (num_start, "0123456789"); + char *bad_num = xstrndup (num_start, len); + if (operating_mode == byte_mode) + error (0, 0, + _("byte offset %s is too large"), quote (bad_num)); + else + error (0, 0, + _("field number %s is too large"), quote (bad_num)); + free (bad_num); + exit (EXIT_FAILURE); + } + + fieldstr++; + } + else + FATAL_ERROR (_("invalid byte, character or field list")); + } + + qsort (rp, n_rp, sizeof (rp[0]), compare_ranges); + + /* Merge range pairs (e.g. `2-5,3-4' becomes `2-5'). */ + for (i = 0; i < n_rp; ++i) + { + for (size_t j = i + 1; j < n_rp; ++j) + { + if (rp[j].lo <= rp[i].hi) + { + rp[i].hi = MAX (rp[j].hi, rp[i].hi); + memmove (rp + j, rp + j + 1, (n_rp - j - 1) * sizeof *rp); + n_rp--; + j--; + } + else + break; + } + } + + complement_rp (); + + /* After merging, reallocate RP so we release memory to the system. + Also add a sentinel at the end of RP, to avoid out of bounds access + and for performance reasons. */ + ++n_rp; + rp = xrealloc (rp, n_rp * sizeof (struct range_pair)); + rp[n_rp - 1].lo = rp[n_rp - 1].hi = SIZE_MAX; + + return field_found; +} + +/* Increment *ITEM_IDX (i.e., a field or byte index), + and if required CURRENT_RP. */ + +static inline void +next_item (size_t *item_idx) +{ + (*item_idx)++; + if ((*item_idx) > current_rp->hi) + current_rp++; +} + +/* Return nonzero if the K'th field or byte is printable. */ + +static inline bool +print_kth (size_t k) +{ + return current_rp->lo <= k; +} + +/* Return nonzero if K'th byte is the beginning of a range. */ + +static inline bool +is_range_start_index (size_t k) +{ + return k == current_rp->lo; +} + +/* Read from stream STREAM, printing to standard output any selected bytes. */ + +static void +cut_bytes (FILE *stream) +{ + size_t byte_idx; /* Number of bytes in the line so far. */ + /* Whether to begin printing delimiters between ranges for the current line. + Set after we've begun printing data corresponding to the first range. */ + bool print_delimiter; + + byte_idx = 0; + print_delimiter = false; + current_rp = rp; + while (true) + { + int c; /* Each character from the file. */ + + c = getc (stream); + + if (c == '\n') + { + putchar ('\n'); + byte_idx = 0; + print_delimiter = false; + current_rp = rp; + } + else if (c == EOF) + { + if (byte_idx > 0) + putchar ('\n'); + break; + } + else + { + next_item (&byte_idx); + if (print_kth (byte_idx)) + { + if (output_delimiter_specified) + { + if (print_delimiter && is_range_start_index (byte_idx)) + { + fwrite (output_delimiter_string, sizeof (char), + output_delimiter_length, stdout); + } + print_delimiter = true; + } + + putchar (c); + } + } + } +} + +/* Read from stream STREAM, printing to standard output any selected fields. */ + +static void +cut_fields (FILE *stream) +{ + int c; + size_t field_idx = 1; + bool found_any_selected_field = false; + bool buffer_first_field; + + current_rp = rp; + + c = getc (stream); + if (c == EOF) + return; + + ungetc (c, stream); + c = 0; + + /* To support the semantics of the -s flag, we may have to buffer + all of the first field to determine whether it is 'delimited.' + But that is unnecessary if all non-delimited lines must be printed + and the first field has been selected, or if non-delimited lines + must be suppressed and the first field has *not* been selected. + That is because a non-delimited line has exactly one field. */ + buffer_first_field = (suppress_non_delimited ^ !print_kth (1)); + + while (1) + { + if (field_idx == 1 && buffer_first_field) + { + ssize_t len; + size_t n_bytes; + + len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0, + GETNLINE_NO_LIMIT, delim, '\n', stream); + if (len < 0) + { + free (field_1_buffer); + field_1_buffer = NULL; + if (ferror (stream) || feof (stream)) + break; + xalloc_die (); + } + + n_bytes = len; + assert (n_bytes != 0); + + c = 0; + + /* If the first field extends to the end of line (it is not + delimited) and we are printing all non-delimited lines, + print this one. */ + if (to_uchar (field_1_buffer[n_bytes - 1]) != delim) + { + if (suppress_non_delimited) + { + /* Empty. */ + } + else + { + fwrite (field_1_buffer, sizeof (char), n_bytes, stdout); + /* Make sure the output line is newline terminated. */ + if (field_1_buffer[n_bytes - 1] != '\n') + putchar ('\n'); + c = '\n'; + } + continue; + } + if (print_kth (1)) + { + /* Print the field, but not the trailing delimiter. */ + fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout); + + /* With -d$'\n' don't treat the last '\n' as a delimiter. */ + if (delim == '\n') + { + int last_c = getc (stream); + if (last_c != EOF) + { + ungetc (last_c, stream); + found_any_selected_field = true; + } + } + else + found_any_selected_field = true; + } + next_item (&field_idx); + } + + int prev_c = c; + + if (print_kth (field_idx)) + { + if (found_any_selected_field) + { + fwrite (output_delimiter_string, sizeof (char), + output_delimiter_length, stdout); + } + found_any_selected_field = true; + + while ((c = getc (stream)) != delim && c != '\n' && c != EOF) + { + putchar (c); + prev_c = c; + } + } + else + { + while ((c = getc (stream)) != delim && c != '\n' && c != EOF) + { + prev_c = c; + } + } + + /* With -d$'\n' don't treat the last '\n' as a delimiter. */ + if (delim == '\n' && c == delim) + { + int last_c = getc (stream); + if (last_c != EOF) + ungetc (last_c, stream); + else + c = last_c; + } + + if (c == delim) + next_item (&field_idx); + else if (c == '\n' || c == EOF) + { + if (found_any_selected_field + || !(suppress_non_delimited && field_idx == 1)) + { + if (c == '\n' || prev_c != '\n' || delim == '\n') + putchar ('\n'); + } + if (c == EOF) + break; + field_idx = 1; + current_rp = rp; + found_any_selected_field = false; + } + } +} + +static void +cut_stream (FILE *stream) +{ + if (operating_mode == byte_mode) + cut_bytes (stream); + else + cut_fields (stream); +} + +/* Process file FILE to standard output. + Return true if successful. */ + +static bool +cut_file (char const *file) +{ + FILE *stream; + + if (STREQ (file, "-")) + { + have_read_stdin = true; + stream = stdin; + } + else + { + stream = fopen (file, "r"); + if (stream == NULL) + { + error (0, errno, "%s", file); + return false; + } + } + + fadvise (stream, FADVISE_SEQUENTIAL); + + cut_stream (stream); + + if (ferror (stream)) + { + error (0, errno, "%s", file); + return false; + } + if (STREQ (file, "-")) + clearerr (stream); /* Also clear EOF. */ + else if (fclose (stream) == EOF) + { + error (0, errno, "%s", file); + return false; + } + return true; +} + +int +main (int argc, char **argv) +{ + int optc; + bool ok; + bool delim_specified = false; + char *spec_list_string IF_LINT ( = NULL); + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + operating_mode = undefined_mode; + + /* By default, all non-delimited lines are printed. */ + suppress_non_delimited = false; + + delim = '\0'; + have_read_stdin = false; + + while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1) + { + switch (optc) + { + case 'b': + case 'c': + /* Build the byte list. */ + if (operating_mode != undefined_mode) + FATAL_ERROR (_("only one type of list may be specified")); + operating_mode = byte_mode; + spec_list_string = optarg; + break; + + case 'f': + /* Build the field list. */ + if (operating_mode != undefined_mode) + FATAL_ERROR (_("only one type of list may be specified")); + operating_mode = field_mode; + spec_list_string = optarg; + break; + + case 'd': + /* New delimiter. */ + /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */ + if (optarg[0] != '\0' && optarg[1] != '\0') + FATAL_ERROR (_("the delimiter must be a single character")); + delim = optarg[0]; + delim_specified = true; + break; + + case OUTPUT_DELIMITER_OPTION: + output_delimiter_specified = true; + /* Interpret --output-delimiter='' to mean + 'use the NUL byte as the delimiter.' */ + output_delimiter_length = (optarg[0] == '\0' + ? 1 : strlen (optarg)); + output_delimiter_string = xstrdup (optarg); + break; + + case 'n': + break; + + case 's': + suppress_non_delimited = true; + break; + + case COMPLEMENT_OPTION: + complement = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + if (operating_mode == undefined_mode) + FATAL_ERROR (_("you must specify a list of bytes, characters, or fields")); + + if (delim_specified && operating_mode != field_mode) + FATAL_ERROR (_("an input delimiter may be specified only\ + when operating on fields")); + + if (suppress_non_delimited && operating_mode != field_mode) + FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\ +\tonly when operating on fields")); + + if (! set_fields (spec_list_string)) + { + if (operating_mode == field_mode) + FATAL_ERROR (_("missing list of fields")); + else + FATAL_ERROR (_("missing list of positions")); + } + + if (!delim_specified) + delim = '\t'; + + if (output_delimiter_string == NULL) + { + static char dummy[2]; + dummy[0] = delim; + dummy[1] = '\0'; + output_delimiter_string = dummy; + output_delimiter_length = 1; + } + + if (optind == argc) + ok = cut_file ("-"); + else + for (ok = true; optind < argc; optind++) + ok &= cut_file (argv[optind]); + + + if (have_read_stdin && fclose (stdin) == EOF) + { + error (0, errno, "-"); + ok = false; + } + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff -r bf5e41260f89 -r 21ad1c1548c4 code/cut.c__heirloom.2012-05-20 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/cut.c__heirloom.2012-05-20 Tue May 12 06:46:59 2015 +0200 @@ -0,0 +1,405 @@ +/* + * cut - cut out fields of lines of files + * + * Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)cut.sl 1.20 (gritter) 5/29/05"; + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iblok.h" + +#if defined (__GLIBC__) && defined (_IO_putc_unlocked) +#undef putc +#define putc(c, f) _IO_putc_unlocked(c, f) +#endif + +struct range { + struct range *r_nxt; + long r_min; + long r_max; +}; + +static unsigned errcnt; /* count of errors */ +static int method; /* one of b, c, f */ +static int nflag; /* character boundary bytes */ +static int sflag; /* suppress lines w/o delimiters */ +static char *progname; /* argv[0] to main() */ +static wchar_t wcdelim = '\t'; /* delimiter character */ +static const char *mbdelim = "\t";/* delimiter character */ +struct range *fields; /* range list */ +static int multibyte; /* multibyte LC_CTYPE */ + +#define next(wc, s) (multibyte ? mbtowc(&(wc), s, MB_LEN_MAX) :\ + ((wc) = *(s) & 0377, (wc) != 0)) + +void * +lrealloc(void *vp, size_t nbytes) +{ + void *p; + + if ((p = realloc(vp, nbytes)) == NULL) { + write(2, "line too long\n", 14); + exit(076); + } + return p; +} + +void * +smalloc(size_t nbytes) +{ + void *p; + + if ((p = malloc(nbytes)) == NULL) { + write(2, "no memory\n", 11); + exit(077); + } + return p; +} + +static void +error(const char *s) +{ + fprintf(stderr, "%s: ERROR: %s\n", progname, s); + exit(2); +} + +static void +usage(void) +{ + error("Usage: cut [-s] [-d] {-c | -f} file ..."); +} + +static void +badlist(void) +{ + error(method == 'b' ? "bad list for b/c/f option" : + "bad list for c/f option"); +} + +static void +setdelim(const char *s) +{ + int n; + + if ((n = next(wcdelim, s)) < 0 || (n > 0 && s[n] != '\0')) + error("no delimiter"); + mbdelim = s; +} + +static void +addrange(long m, long n) +{ + struct range *rp, *rq; + + rp = smalloc(sizeof *rp); + rp->r_nxt = NULL; + rp->r_min = m; + rp->r_max = n ? n : m; + if (fields) { + for (rq = fields; rq->r_nxt; rq = rq->r_nxt); + rq->r_nxt = rp; + } else + fields = rp; +} + +static int +have(long i) +{ + struct range *rp; + + for (rp = fields; rp; rp = rp->r_nxt) + if (i >= rp->r_min && i <= rp->r_max) + return 1; + return 0; +} + +#define mnreset() m = 0, n = 0, lp = &m + +static void +setlist(const char *s) +{ + char *cbuf, *cp; + long m, n; + long *lp; + + fields = NULL; + cbuf = smalloc(strlen(s) + 1); + mnreset(); + for (;;) { + if (*s == '-') { + if (m == 0) + m = 1; + n = LONG_MAX; + lp = &n; + s++; + } else if (*s == ',' || *s == ' ' || *s == '\t' || *s == '\0') { + if (m) + addrange(m, n); + mnreset(); + if (*s == '\0') + break; + s++; + } else if (isdigit(*s & 0377)) { + cp = cbuf; + do + *cp++ = *s++; + while (isdigit(*s & 0377)); + *cp = '\0'; + *lp = strtol(cbuf, NULL, 10); + } else + badlist(); + } + if (fields == NULL) + error("no fields"); + free(cbuf); +} + +static void +cutb(struct iblok *ip) +{ + int c, i; + + i = 1; + while ((c = ib_get(ip)) != EOF) { + if (c == '\n') { + i = 1; + putc(c, stdout); + } else if (have(i++)) + putc(c, stdout); + } +} + +static void +cutbn(struct iblok *ip) +{ + char *cp; + int i, m, n; + wint_t wc; + + i = 1; + while ((cp = ib_getw(ip, &wc, &n)) != NULL) { + if (wc == '\n') { + i = 1; + putc('\n', stdout); + } else { + if (have(i + n - 1)) + for (m = 0; m < n; m++) + putc(cp[m], stdout); + i += n; + } + } +} + +static void +cutc(struct iblok *ip) +{ + char *cp; + int i, n, m; + wint_t wc; + + i = 1; + while ((cp = ib_getw(ip, &wc, &n)) != NULL) { + if (wc == '\n') { + i = 1; + putc('\n', stdout); + } else if (wc != WEOF && have(i++)) { + for (m = 0; m < n; m++) + putc(cp[m], stdout); + } + } +} + +static void +cutf(struct iblok *ip) +{ + static char *line; + static size_t linesize; + char *cp, *lp, *lq; + int c, i, n, m, gotcha; + char b; + wint_t wc; + const int incr = 128; + + if (linesize == 0) + line = smalloc(linesize = incr); + lp = line; + gotcha = 0; + i = 1; + do { + if (multibyte) + cp = ib_getw(ip, &wc, &n); + else { + if ((c = ib_get(ip)) != EOF) { + wc = c; + b = (char)c; + cp = &b; + } else { + wc = WEOF; + cp = NULL; + } + n = 1; + } + if (cp == NULL || wc == '\n' || wc == wcdelim) { + if (have(i) && (!sflag || gotcha || wc == wcdelim) || + (!sflag && i == 1 && + (cp == NULL || wc == '\n'))) { + if (gotcha) + for (m = 0; mbdelim[m]; m++) + putc(mbdelim[m], stdout); + for (lq = line; lq < lp; lq++) + putc(*lq, stdout); + gotcha = 1; + } + if (wc == '\n') { + if (gotcha) + putc('\n', stdout); + i = 1; + gotcha = 0; + } else + i++; + lp = line; + } else { + for (m = 0; m < n; m++) { + if (lp >= &line[linesize]) { + size_t diff = lp - line; + line = lrealloc(line, linesize += incr); + lp = &line[diff]; + } + *lp++ = cp[m]; + } + } + } while (cp != NULL); +} + +static int +fdcut(int fd) +{ + struct iblok *ip; + + ip = ib_alloc(fd, 0); + switch (method) { + case 'b': + if (nflag && multibyte) + cutbn(ip); + else + cutb(ip); + break; + case 'c': + if (multibyte) + cutc(ip); + else + cutb(ip); + break; + case 'f': + cutf(ip); + break; + } + ib_free(ip); + return 0; +} + +static int +fncut(const char *fn) +{ + int fd, res; + + if (fn[0] == '-' && fn[1] == '\0') + fd = 0; + else if ((fd = open(fn, O_RDONLY)) < 0) { + fprintf(stderr, "%s: WARNING: cannot open %s\n", progname, fn); + return 1; + } + res = fdcut(fd); + if (fd) + close(fd); + return res; +} + +int +main(int argc, char **argv) +{ + const char optstring[] = "b:c:d:f:ns"; + int i; + + progname = basename(argv[0]); + setlocale(LC_CTYPE, ""); + multibyte = MB_CUR_MAX > 1; +#ifdef __GLIBC__ + putenv("POSIXLY_CORRECT=1"); +#endif + while ((i = getopt(argc, argv, optstring)) != EOF) { + switch (i) { + case 'b': + case 'c': + case 'f': + if (method && method != i) + usage(); + method = i; + setlist(optarg); + break; + case 'd': + setdelim(optarg); + break; + case 'n': + nflag = 1; + break; + case 's': + sflag = 1; + break; + default: + usage(); + } + } + /*if ((sflag && method != 'f') || (nflag && method != 'b')) + usage();*/ + if (method == 0) + badlist(); + if (argv[optind]) { + for (i = optind; argv[i]; i++) + errcnt |= fncut(argv[i]); + } else + errcnt |= fdcut(0); + return errcnt; +} diff -r bf5e41260f89 -r 21ad1c1548c4 code/cut.c__netbsd.1993-03-21 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/cut.c__netbsd.1993-03-21 Tue May 12 06:46:59 2015 +0200 @@ -0,0 +1,270 @@ +/* + * Copyright (c) 1989 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +char copyright[] = +"@(#) Copyright (c) 1989 The Regents of the University of California.\n\ + All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)cut.c 5.4 (Berkeley) 10/30/90"; +#endif /* not lint */ + +#include +#include +#include + +int cflag; +char dchar; +int dflag; +int fflag; +int sflag; + +main(argc, argv) + int argc; + char **argv; +{ + extern char *optarg; + extern int errno, optind; + FILE *fp; + int ch, (*fcn)(), c_cut(), f_cut(); + char *strerror(); + + dchar = '\t'; /* default delimiter is \t */ + + while ((ch = getopt(argc, argv, "c:d:f:s")) != EOF) + switch(ch) { + case 'c': + fcn = c_cut; + get_list(optarg); + cflag = 1; + break; + case 'd': + dchar = *optarg; + dflag = 1; + break; + case 'f': + get_list(optarg); + fcn = f_cut; + fflag = 1; + break; + case 's': + sflag = 1; + break; + case '?': + default: + usage(); + } + argc -= optind; + argv += optind; + + if (fflag) { + if (cflag) + usage(); + } else if (!cflag || dflag || sflag) + usage(); + + if (*argv) + for (; *argv; ++argv) { + if (!(fp = fopen(*argv, "r"))) { + (void)fprintf(stderr, + "cut: %s: %s\n", *argv, strerror(errno)); + exit(1); + } + fcn(fp, *argv); + } + else + fcn(stdin, "stdin"); + exit(0); +} + +int autostart, autostop, maxval; + +char positions[_POSIX2_LINE_MAX + 1]; + +get_list(list) + char *list; +{ + register char *pos; + register int setautostart, start, stop; + char *p, *strtok(); + + /* + * set a byte in the positions array to indicate if a field or + * column is to be selected; use +1, it's 1-based, not 0-based. + * This parser is less restrictive than the Draft 9 POSIX spec. + * POSIX doesn't allow lists that aren't in increasing order or + * overlapping lists. We also handle "-3-5" although there's no + * real reason too. + */ + for (; p = strtok(list, ", \t"); list = NULL) { + setautostart = start = stop = 0; + if (*p == '-') { + ++p; + setautostart = 1; + } + if (isdigit(*p)) { + start = stop = strtol(p, &p, 10); + if (setautostart && start > autostart) + autostart = start; + } + if (*p == '-') { + if (isdigit(p[1])) + stop = strtol(p + 1, &p, 10); + if (*p == '-') { + ++p; + if (!autostop || autostop > stop) + autostop = stop; + } + } + if (*p) + badlist("illegal list value"); + if (!stop || !start) + badlist("values may not include zero"); + if (stop > _POSIX2_LINE_MAX) { + /* positions used rather than allocate a new buffer */ + (void)sprintf(positions, "%d too large (max %d)", + stop, _POSIX2_LINE_MAX); + badlist(positions); + } + if (maxval < stop) + maxval = stop; + for (pos = positions + start; start++ <= stop; *pos++ = 1); + } + + /* overlapping ranges */ + if (autostop && maxval > autostop) + maxval = autostop; + + /* set autostart */ + if (autostart) + memset(positions + 1, '1', autostart); +} + +/* ARGSUSED */ +c_cut(fp, fname) + FILE *fp; + char *fname; +{ + register int ch, col; + register char *pos; + + for (;;) { + pos = positions + 1; + for (col = maxval; col; --col) { + if ((ch = getc(fp)) == EOF) + return; + if (ch == '\n') + break; + if (*pos++) + putchar(ch); + } + if (ch != '\n') + if (autostop) + while ((ch = getc(fp)) != EOF && ch != '\n') + putchar(ch); + else + while ((ch = getc(fp)) != EOF && ch != '\n'); + putchar('\n'); + } +} + +f_cut(fp, fname) + FILE *fp; + char *fname; +{ + register int ch, field, isdelim; + register char *pos, *p, sep; + int output; + char lbuf[_POSIX2_LINE_MAX + 1]; + + for (sep = dchar, output = 0; fgets(lbuf, sizeof(lbuf), fp);) { + for (isdelim = 0, p = lbuf;; ++p) { + if (!(ch = *p)) { + (void)fprintf(stderr, + "cut: %s: line too long.\n", fname); + exit(1); + } + /* this should work if newline is delimiter */ + if (ch == sep) + isdelim = 1; + if (ch == '\n') { + if (!isdelim && !sflag) + (void)printf("%s", lbuf); + break; + } + } + if (!isdelim) + continue; + + pos = positions + 1; + for (field = maxval, p = lbuf; field; --field, ++pos) { + if (*pos) { + if (output++) + putchar(sep); + while ((ch = *p++) != '\n' && ch != sep) + putchar(ch); + } else + while ((ch = *p++) != '\n' && ch != sep); + if (ch == '\n') + break; + } + if (ch != '\n') + if (autostop) { + if (output) + putchar(sep); + for (; (ch = *p) != '\n'; ++p) + putchar(ch); + } else + for (; (ch = *p) != '\n'; ++p); + putchar('\n'); + } +} + +badlist(msg) + char *msg; +{ + (void)fprintf(stderr, "cut: [-cf] list: %s.\n", msg); + exit(1); +} + +usage() +{ + (void)fprintf(stderr, +"usage:\tcut -c list [file1 ...]\n\tcut -f list [-s] [-d delim] [file ...]\n"); + exit(1); +} diff -r bf5e41260f89 -r 21ad1c1548c4 code/cut.c__netbsd.2014-02-03 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/cut.c__netbsd.2014-02-03 Tue May 12 06:46:59 2015 +0200 @@ -0,0 +1,306 @@ +/* $NetBSD: cut.c,v 1.29 2014/02/03 20:22:19 wiz Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#ifndef lint +__COPYRIGHT("@(#) Copyright (c) 1989, 1993\ + The Regents of the University of California. All rights reserved."); +#endif /* not lint */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; +#endif +__RCSID("$NetBSD: cut.c,v 1.29 2014/02/03 20:22:19 wiz Exp $"); +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int bflag; +static int cflag; +static char dchar; +static int dflag; +static int fflag; +static int sflag; + +static void b_cut(FILE *, const char *); +static void c_cut(FILE *, const char *); +static void f_cut(FILE *, const char *); +static void get_list(char *); +static void usage(void) __dead; + +int +main(int argc, char *argv[]) +{ + FILE *fp; + void (*fcn)(FILE *, const char *); + int ch, rval; + + fcn = NULL; + (void)setlocale(LC_ALL, ""); + + dchar = '\t'; /* default delimiter is \t */ + + /* Since we don't support multi-byte characters, the -c and -b + options are equivalent, and the -n option is meaningless. */ + while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) + switch(ch) { + case 'b': + fcn = b_cut; + get_list(optarg); + bflag = 1; + break; + case 'c': + fcn = c_cut; + get_list(optarg); + cflag = 1; + break; + case 'd': + dchar = *optarg; + dflag = 1; + break; + case 'f': + get_list(optarg); + fcn = f_cut; + fflag = 1; + break; + case 's': + sflag = 1; + break; + case 'n': + break; + case '?': + default: + usage(); + } + argc -= optind; + argv += optind; + + if (fflag) { + if (cflag || bflag) + usage(); + } else if ((!cflag && !bflag) || dflag || sflag) + usage(); + else if (bflag && cflag) + usage(); + + rval = 0; + if (*argv) + for (; *argv; ++argv) { + if (strcmp(*argv, "-") == 0) + fcn(stdin, "stdin"); + else { + if ((fp = fopen(*argv, "r"))) { + fcn(fp, *argv); + (void)fclose(fp); + } else { + rval = 1; + warn("%s", *argv); + } + } + } + else + fcn(stdin, "stdin"); + return(rval); +} + +static size_t autostart, autostop, maxval; + +static char *positions = NULL; +static size_t numpositions = 0; +#define ALLOC_CHUNK _POSIX2_LINE_MAX /* malloc granularity */ + +static void +get_list(char *list) +{ + size_t setautostart, start, stop; + char *pos; + char *p; + + if (positions == NULL) { + numpositions = ALLOC_CHUNK; + positions = ecalloc(numpositions, sizeof(*positions)); + } + + /* + * set a byte in the positions array to indicate if a field or + * column is to be selected; use +1, it's 1-based, not 0-based. + * This parser is less restrictive than the Draft 9 POSIX spec. + * POSIX doesn't allow lists that aren't in increasing order or + * overlapping lists. We also handle "-3-5" although there's no + * real reason to. + */ + for (; (p = strtok(list, ", \t")) != NULL; list = NULL) { + setautostart = start = stop = 0; + if (*p == '-') { + ++p; + setautostart = 1; + } + if (isdigit((unsigned char)*p)) { + start = stop = strtol(p, &p, 10); + if (setautostart && start > autostart) + autostart = start; + } + if (*p == '-') { + if (isdigit((unsigned char)p[1])) + stop = strtol(p + 1, &p, 10); + if (*p == '-') { + ++p; + if (!autostop || autostop > stop) + autostop = stop; + } + } + if (*p) + errx(1, "[-bcf] list: illegal list value"); + if (!stop || !start) + errx(1, "[-bcf] list: values may not include zero"); + if (stop + 1 > numpositions) { + size_t newsize; + newsize = roundup(stop + 1, ALLOC_CHUNK); + positions = erealloc(positions, newsize); + (void)memset(positions + numpositions, 0, + newsize - numpositions); + numpositions = newsize; + } + if (maxval < stop) + maxval = stop; + for (pos = positions + start; start++ <= stop; pos++) + *pos = 1; + } + + /* overlapping ranges */ + if (autostop && maxval > autostop) + maxval = autostop; + + /* set autostart */ + if (autostart) + (void)memset(positions + 1, '1', autostart); +} + +static void +/*ARGSUSED*/ +f_cut(FILE *fp, const char *fname __unused) +{ + int ch, field, isdelim; + char *pos, *p, sep; + int output; + size_t len; + char *lbuf, *tbuf; + + for (sep = dchar, tbuf = NULL; (lbuf = fgetln(fp, &len)) != NULL;) { + output = 0; + if (lbuf[len - 1] != '\n') { + /* no newline at the end of the last line so add one */ + if ((tbuf = (char *)malloc(len + 1)) == NULL) + err(1, NULL); + (void)memcpy(tbuf, lbuf, len); + tbuf[len++] = '\n'; + lbuf = tbuf; + } + for (isdelim = 0, p = lbuf;; ++p) { + ch = *p; + /* this should work if newline is delimiter */ + if (ch == sep) + isdelim = 1; + if (ch == '\n') { + if (!isdelim && !sflag) + (void)fwrite(lbuf, len, 1, stdout); + break; + } + } + if (!isdelim) + continue; + + pos = positions + 1; + for (field = maxval, p = lbuf; field; --field, ++pos) { + if (*pos) { + if (output++) + (void)putchar(sep); + while ((ch = *p++) != '\n' && ch != sep) + (void)putchar(ch); + } else { + while ((ch = *p++) != '\n' && ch != sep) + continue; + } + if (ch == '\n') + break; + } + if (ch != '\n') { + if (autostop) { + if (output) + (void)putchar(sep); + for (; (ch = *p) != '\n'; ++p) + (void)putchar(ch); + } else + for (; (ch = *p) != '\n'; ++p); + } + (void)putchar('\n'); + if (tbuf) { + free(tbuf); + tbuf = NULL; + } + } + if (tbuf) + free(tbuf); +} + +static void +usage(void) +{ + (void)fprintf(stderr, "usage:\tcut -b list [-n] [file ...]\n" + "\tcut -c list [file ...]\n" + "\tcut -f list [-d string] [-s] [file ...]\n"); + exit(1); +} + +/* make b_put(): */ +#define CUT_BYTE 1 +#include "x_cut.c" +#undef CUT_BYTE + +/* make c_put(): */ +#define CUT_BYTE 0 +#include "x_cut.c" +#undef CUT_BYTE diff -r bf5e41260f89 -r 21ad1c1548c4 code/cut.c__openbsd.2008-06-27 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/cut.c__openbsd.2008-06-27 Tue May 12 06:46:59 2015 +0200 @@ -0,0 +1,290 @@ +/* $OpenBSD: cut.c,v 1.13 2008/06/27 08:02:13 sobrado Exp $ */ +/* $NetBSD: cut.c,v 1.9 1995/09/02 05:59:23 jtc Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1989, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; +#endif +static char rcsid[] = "$OpenBSD: cut.c,v 1.13 2008/06/27 08:02:13 sobrado Exp $"; +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int cflag; +char dchar; +int dflag; +int fflag; +int sflag; + +void c_cut(FILE *, char *); +void f_cut(FILE *, char *); +void get_list(char *); +void usage(void); + +int +main(int argc, char *argv[]) +{ + FILE *fp; + void (*fcn)(FILE *, char *); + int ch; + + setlocale (LC_ALL, ""); + + dchar = '\t'; /* default delimiter is \t */ + + /* Since we don't support multi-byte characters, the -c and -b + options are equivalent, and the -n option is meaningless. */ + while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) + switch(ch) { + case 'b': + case 'c': + fcn = c_cut; + get_list(optarg); + cflag = 1; + break; + case 'd': + dchar = *optarg; + dflag = 1; + break; + case 'f': + get_list(optarg); + fcn = f_cut; + fflag = 1; + break; + case 's': + sflag = 1; + break; + case 'n': + break; + case '?': + default: + usage(); + } + argc -= optind; + argv += optind; + + if (fflag) { + if (cflag) + usage(); + } else if (!cflag || dflag || sflag) + usage(); + + if (*argv) + for (; *argv; ++argv) { + if (!(fp = fopen(*argv, "r"))) + err(1, "%s", *argv); + fcn(fp, *argv); + (void)fclose(fp); + } + else + fcn(stdin, "stdin"); + exit(0); +} + +int autostart, autostop, maxval; + +char positions[_POSIX2_LINE_MAX + 1]; + +void +get_list(char *list) +{ + int setautostart, start, stop; + char *pos; + char *p; + + /* + * set a byte in the positions array to indicate if a field or + * column is to be selected; use +1, it's 1-based, not 0-based. + * This parser is less restrictive than the Draft 9 POSIX spec. + * POSIX doesn't allow lists that aren't in increasing order or + * overlapping lists. We also handle "-3-5" although there's no + * real reason too. + */ + while ((p = strsep(&list, ", \t"))) { + setautostart = start = stop = 0; + if (*p == '-') { + ++p; + setautostart = 1; + } + if (isdigit(*p)) { + start = stop = strtol(p, &p, 10); + if (setautostart && start > autostart) + autostart = start; + } + if (*p == '-') { + if (isdigit(p[1])) + stop = strtol(p + 1, &p, 10); + if (*p == '-') { + ++p; + if (!autostop || autostop > stop) + autostop = stop; + } + } + if (*p) + errx(1, "[-cf] list: illegal list value"); + if (!stop || !start) + errx(1, "[-cf] list: values may not include zero"); + if (stop > _POSIX2_LINE_MAX) + errx(1, "[-cf] list: %d too large (max %d)", + stop, _POSIX2_LINE_MAX); + if (maxval < stop) + maxval = stop; + for (pos = positions + start; start++ <= stop; *pos++ = 1) + ; + } + + /* overlapping ranges */ + if (autostop && maxval > autostop) + maxval = autostop; + + /* set autostart */ + if (autostart) + memset(positions + 1, '1', autostart); +} + +/* ARGSUSED */ +void +c_cut(FILE *fp, char *fname) +{ + int ch, col; + char *pos; + + for (;;) { + pos = positions + 1; + for (col = maxval; col; --col) { + if ((ch = getc(fp)) == EOF) + return; + if (ch == '\n') + break; + if (*pos++) + (void)putchar(ch); + } + if (ch != '\n') { + if (autostop) + while ((ch = getc(fp)) != EOF && ch != '\n') + (void)putchar(ch); + else + while ((ch = getc(fp)) != EOF && ch != '\n') + ; + } + (void)putchar('\n'); + } +} + +void +f_cut(FILE *fp, char *fname) +{ + int ch, field, isdelim; + char *pos, *p, sep; + int output; + size_t len; + char *lbuf, *tbuf; + + for (sep = dchar, tbuf = NULL; (lbuf = fgetln(fp, &len));) { + output = 0; + if (lbuf[len - 1] != '\n') { + /* no newline at the end of the last line so add one */ + if ((tbuf = (char *)malloc(len + 1)) == NULL) + err(1, NULL); + memcpy(tbuf, lbuf, len); + tbuf[len] = '\n'; + lbuf = tbuf; + } + for (isdelim = 0, p = lbuf;; ++p) { + ch = *p; + /* this should work if newline is delimiter */ + if (ch == sep) + isdelim = 1; + if (ch == '\n') { + if (!isdelim && !sflag) + (void)fwrite(lbuf, len, 1, stdout); + break; + } + } + if (!isdelim) + continue; + + pos = positions + 1; + for (field = maxval, p = lbuf; field; --field, ++pos) { + if (*pos) { + if (output++) + (void)putchar(sep); + while ((ch = *p++) != '\n' && ch != sep) + (void)putchar(ch); + } else + while ((ch = *p++) != '\n' && ch != sep) + ; + if (ch == '\n') + break; + } + if (ch != '\n') { + if (autostop) { + if (output) + (void)putchar(sep); + for (; (ch = *p) != '\n'; ++p) + (void)putchar(ch); + } else + for (; (ch = *p) != '\n'; ++p) + ; + } + (void)putchar('\n'); + } + if (tbuf) + free(tbuf); +} + +void +usage(void) +{ + (void)fprintf(stderr, + "usage: cut -b list [-n] [file ...]\n" + " cut -c list [file ...]\n" + " cut -f list [-s] [-d delim] [file ...]\n"); + exit(1); +} diff -r bf5e41260f89 -r 21ad1c1548c4 code/cut.c__system_iii.1980-04-11 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/cut.c__system_iii.1980-04-11 Tue May 12 06:46:59 2015 +0200 @@ -0,0 +1,123 @@ +# +/* cut : cut and paste columns of a table (projection of a relation) (GWRL) */ +/* Release 1.5; handles single backspaces as produced by nroff */ +# include /* make: cc cut.c */ +# define NFIELDS 512 /* max no of fields or resulting line length */ +# define BACKSPACE 8 +main(argc, argv) +int argc; char **argv; +{ + int del = '\t'; + int i, j, count, poscnt, r, s, t; + int endflag, supflag, cflag, fflag, backflag, filenr; + int sel[NFIELDS]; + register int c; + register char *p1; + char *p2, outbuf[NFIELDS]; + FILE *inptr; + + +while (argc > 1 && argv[1][0] == '-'){ + for (i = 1; (c = argv[1][i]) != '\0'; i++) { + switch(c) { + case 'd' : del = argv[1][++i]; + if (del == '\0') diag("no delimiter\n"); + break; + case 's': supflag++ ; + break; + case 'c': cflag++ ; + break; + case 'f': fflag++ ; + break; + default : diag("Usage: cut [-s] [-d] {-c | -f} file ...\n"); + break; + } + if (!endflag && (cflag || fflag)) { + endflag = 1; + r = s = t = 0; + do { c = argv[1][++i]; + switch(c) { + case '-' : if (r) diagl(); + r = 1; + if (t == 0) s = 1; + else {s = t; t = 0;} + continue; + case '\0' : + case ',' : if (t >= NFIELDS) diagl(); + if (r) { if (t == 0) t = NFIELDS - 1; + if (t 0 ? 1 : 0); + r = s = t = 0; + if (c == '\0') {i--; break;} + continue; + default : + if (c< '0' || c> '9') diagl(); + t = 10*t + c - '0'; + continue; + } + for (j = t = 0; j < NFIELDS; j++) t += sel[j]; + if (t == 0) diag("no fields\n"); + } while (c != '\0'); + } + } + --argc; + ++argv; +} /* end options */ +if (!(cflag || fflag)) diagl(); + +--argc; +filenr = 1; +do { /* for all input files */ + if (argc > 0) inptr = fopen(argv[filenr], "r"); + else inptr = stdin; + + if (inptr == NULL) { + write(2,"Cannot open :",14); + diag(argv[filenr]); + } + endflag = 0; + do { /* for all lines of a file */ + count = poscnt = backflag = 0; + p1 = &outbuf[0] - 1 ; + p2 = p1; + do { /* for all char of the line */ + c = fgetc(inptr); + if (c == EOF) { + endflag = 1; + break; + } + if (count == NFIELDS - 1) diag("line too long\n"); + if (c != '\n') *++p1 = c; + if (cflag && (c == BACKSPACE)) backflag++ ; else + { if ( !backflag ) poscnt += 1 ; else backflag-- ;} + if ( backflag > 1 ) diag("cannot handle multiple adjacent backspaces\n"); + if ( ((c == '\n') && count > 0) || c == del || cflag) { + count += 1; + if (fflag) poscnt = count ; + if (sel[poscnt]) p2 = p1; else p1 = p2; + } + }while (c != '\n'); + if ( !endflag && (count > 0 || !supflag)) { + if (*p1 == del) *p1 = '\0'; + else *++p1 = '\0'; /*suppress trailing delimiter*/ + puts(outbuf); + } + } while (!endflag) ; +fclose(inptr); +} while(++filenr <= argc); +} + +diag(s) +char *s; +{ + write(2, "cut : ", 6); + while(*s) + write(2,s++,1); + exit(2); +} +diagl() +{ +diag("bad list for c/f option\n"); +}