meillo@14: /* meillo@14: * cut - cut out fields of lines of files meillo@14: * meillo@14: * Gunnar Ritter, Freiburg i. Br., Germany, December 2002. meillo@14: */ meillo@14: /* meillo@14: * Copyright (c) 2003 Gunnar Ritter meillo@14: * meillo@14: * This software is provided 'as-is', without any express or implied meillo@14: * warranty. In no event will the authors be held liable for any damages meillo@14: * arising from the use of this software. meillo@14: * meillo@14: * Permission is granted to anyone to use this software for any purpose, meillo@14: * including commercial applications, and to alter it and redistribute meillo@14: * it freely, subject to the following restrictions: meillo@14: * meillo@14: * 1. The origin of this software must not be misrepresented; you must not meillo@14: * claim that you wrote the original software. If you use this software meillo@14: * in a product, an acknowledgment in the product documentation would be meillo@14: * appreciated but is not required. meillo@14: * meillo@14: * 2. Altered source versions must be plainly marked as such, and must not be meillo@14: * misrepresented as being the original software. meillo@14: * meillo@14: * 3. This notice may not be removed or altered from any source distribution. meillo@14: */ meillo@14: meillo@14: #if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 meillo@14: #define USED __attribute__ ((used)) meillo@14: #elif defined __GNUC__ meillo@14: #define USED __attribute__ ((unused)) meillo@14: #else meillo@14: #define USED meillo@14: #endif meillo@14: static const char sccsid[] USED = "@(#)cut.sl 1.20 (gritter) 5/29/05"; meillo@14: meillo@14: #include meillo@14: #include meillo@14: #include meillo@14: #include meillo@14: #include meillo@14: #include meillo@14: #include meillo@14: #include meillo@14: #include meillo@14: #include meillo@14: #include meillo@14: #include meillo@14: #include meillo@14: meillo@14: #include "iblok.h" meillo@14: meillo@14: #if defined (__GLIBC__) && defined (_IO_putc_unlocked) meillo@14: #undef putc meillo@14: #define putc(c, f) _IO_putc_unlocked(c, f) meillo@14: #endif meillo@14: meillo@14: struct range { meillo@14: struct range *r_nxt; meillo@14: long r_min; meillo@14: long r_max; meillo@14: }; meillo@14: meillo@14: static unsigned errcnt; /* count of errors */ meillo@14: static int method; /* one of b, c, f */ meillo@14: static int nflag; /* character boundary bytes */ meillo@14: static int sflag; /* suppress lines w/o delimiters */ meillo@14: static char *progname; /* argv[0] to main() */ meillo@14: static wchar_t wcdelim = '\t'; /* delimiter character */ meillo@14: static const char *mbdelim = "\t";/* delimiter character */ meillo@14: struct range *fields; /* range list */ meillo@14: static int multibyte; /* multibyte LC_CTYPE */ meillo@14: meillo@14: #define next(wc, s) (multibyte ? mbtowc(&(wc), s, MB_LEN_MAX) :\ meillo@14: ((wc) = *(s) & 0377, (wc) != 0)) meillo@14: meillo@14: void * meillo@14: lrealloc(void *vp, size_t nbytes) meillo@14: { meillo@14: void *p; meillo@14: meillo@14: if ((p = realloc(vp, nbytes)) == NULL) { meillo@14: write(2, "line too long\n", 14); meillo@14: exit(076); meillo@14: } meillo@14: return p; meillo@14: } meillo@14: meillo@14: void * meillo@14: smalloc(size_t nbytes) meillo@14: { meillo@14: void *p; meillo@14: meillo@14: if ((p = malloc(nbytes)) == NULL) { meillo@14: write(2, "no memory\n", 11); meillo@14: exit(077); meillo@14: } meillo@14: return p; meillo@14: } meillo@14: meillo@14: static void meillo@14: error(const char *s) meillo@14: { meillo@14: fprintf(stderr, "%s: ERROR: %s\n", progname, s); meillo@14: exit(2); meillo@14: } meillo@14: meillo@14: static void meillo@14: usage(void) meillo@14: { meillo@14: error("Usage: cut [-s] [-d] {-c | -f} file ..."); meillo@14: } meillo@14: meillo@14: static void meillo@14: badlist(void) meillo@14: { meillo@14: error(method == 'b' ? "bad list for b/c/f option" : meillo@14: "bad list for c/f option"); meillo@14: } meillo@14: meillo@14: static void meillo@14: setdelim(const char *s) meillo@14: { meillo@14: int n; meillo@14: meillo@14: if ((n = next(wcdelim, s)) < 0 || (n > 0 && s[n] != '\0')) meillo@14: error("no delimiter"); meillo@14: mbdelim = s; meillo@14: } meillo@14: meillo@14: static void meillo@14: addrange(long m, long n) meillo@14: { meillo@14: struct range *rp, *rq; meillo@14: meillo@14: rp = smalloc(sizeof *rp); meillo@14: rp->r_nxt = NULL; meillo@14: rp->r_min = m; meillo@14: rp->r_max = n ? n : m; meillo@14: if (fields) { meillo@14: for (rq = fields; rq->r_nxt; rq = rq->r_nxt); meillo@14: rq->r_nxt = rp; meillo@14: } else meillo@14: fields = rp; meillo@14: } meillo@14: meillo@14: static int meillo@14: have(long i) meillo@14: { meillo@14: struct range *rp; meillo@14: meillo@14: for (rp = fields; rp; rp = rp->r_nxt) meillo@14: if (i >= rp->r_min && i <= rp->r_max) meillo@14: return 1; meillo@14: return 0; meillo@14: } meillo@14: meillo@14: #define mnreset() m = 0, n = 0, lp = &m meillo@14: meillo@14: static void meillo@14: setlist(const char *s) meillo@14: { meillo@14: char *cbuf, *cp; meillo@14: long m, n; meillo@14: long *lp; meillo@14: meillo@14: fields = NULL; meillo@14: cbuf = smalloc(strlen(s) + 1); meillo@14: mnreset(); meillo@14: for (;;) { meillo@14: if (*s == '-') { meillo@14: if (m == 0) meillo@14: m = 1; meillo@14: n = LONG_MAX; meillo@14: lp = &n; meillo@14: s++; meillo@14: } else if (*s == ',' || *s == ' ' || *s == '\t' || *s == '\0') { meillo@14: if (m) meillo@14: addrange(m, n); meillo@14: mnreset(); meillo@14: if (*s == '\0') meillo@14: break; meillo@14: s++; meillo@14: } else if (isdigit(*s & 0377)) { meillo@14: cp = cbuf; meillo@14: do meillo@14: *cp++ = *s++; meillo@14: while (isdigit(*s & 0377)); meillo@14: *cp = '\0'; meillo@14: *lp = strtol(cbuf, NULL, 10); meillo@14: } else meillo@14: badlist(); meillo@14: } meillo@14: if (fields == NULL) meillo@14: error("no fields"); meillo@14: free(cbuf); meillo@14: } meillo@14: meillo@14: static void meillo@14: cutb(struct iblok *ip) meillo@14: { meillo@14: int c, i; meillo@14: meillo@14: i = 1; meillo@14: while ((c = ib_get(ip)) != EOF) { meillo@14: if (c == '\n') { meillo@14: i = 1; meillo@14: putc(c, stdout); meillo@14: } else if (have(i++)) meillo@14: putc(c, stdout); meillo@14: } meillo@14: } meillo@14: meillo@14: static void meillo@14: cutbn(struct iblok *ip) meillo@14: { meillo@14: char *cp; meillo@14: int i, m, n; meillo@14: wint_t wc; meillo@14: meillo@14: i = 1; meillo@14: while ((cp = ib_getw(ip, &wc, &n)) != NULL) { meillo@14: if (wc == '\n') { meillo@14: i = 1; meillo@14: putc('\n', stdout); meillo@14: } else { meillo@14: if (have(i + n - 1)) meillo@14: for (m = 0; m < n; m++) meillo@14: putc(cp[m], stdout); meillo@14: i += n; meillo@14: } meillo@14: } meillo@14: } meillo@14: meillo@14: static void meillo@14: cutc(struct iblok *ip) meillo@14: { meillo@14: char *cp; meillo@14: int i, n, m; meillo@14: wint_t wc; meillo@14: meillo@14: i = 1; meillo@14: while ((cp = ib_getw(ip, &wc, &n)) != NULL) { meillo@14: if (wc == '\n') { meillo@14: i = 1; meillo@14: putc('\n', stdout); meillo@14: } else if (wc != WEOF && have(i++)) { meillo@14: for (m = 0; m < n; m++) meillo@14: putc(cp[m], stdout); meillo@14: } meillo@14: } meillo@14: } meillo@14: meillo@14: static void meillo@14: cutf(struct iblok *ip) meillo@14: { meillo@14: static char *line; meillo@14: static size_t linesize; meillo@14: char *cp, *lp, *lq; meillo@14: int c, i, n, m, gotcha; meillo@14: char b; meillo@14: wint_t wc; meillo@14: const int incr = 128; meillo@14: meillo@14: if (linesize == 0) meillo@14: line = smalloc(linesize = incr); meillo@14: lp = line; meillo@14: gotcha = 0; meillo@14: i = 1; meillo@14: do { meillo@14: if (multibyte) meillo@14: cp = ib_getw(ip, &wc, &n); meillo@14: else { meillo@14: if ((c = ib_get(ip)) != EOF) { meillo@14: wc = c; meillo@14: b = (char)c; meillo@14: cp = &b; meillo@14: } else { meillo@14: wc = WEOF; meillo@14: cp = NULL; meillo@14: } meillo@14: n = 1; meillo@14: } meillo@14: if (cp == NULL || wc == '\n' || wc == wcdelim) { meillo@14: if (have(i) && (!sflag || gotcha || wc == wcdelim) || meillo@14: (!sflag && i == 1 && meillo@14: (cp == NULL || wc == '\n'))) { meillo@14: if (gotcha) meillo@14: for (m = 0; mbdelim[m]; m++) meillo@14: putc(mbdelim[m], stdout); meillo@14: for (lq = line; lq < lp; lq++) meillo@14: putc(*lq, stdout); meillo@14: gotcha = 1; meillo@14: } meillo@14: if (wc == '\n') { meillo@14: if (gotcha) meillo@14: putc('\n', stdout); meillo@14: i = 1; meillo@14: gotcha = 0; meillo@14: } else meillo@14: i++; meillo@14: lp = line; meillo@14: } else { meillo@14: for (m = 0; m < n; m++) { meillo@14: if (lp >= &line[linesize]) { meillo@14: size_t diff = lp - line; meillo@14: line = lrealloc(line, linesize += incr); meillo@14: lp = &line[diff]; meillo@14: } meillo@14: *lp++ = cp[m]; meillo@14: } meillo@14: } meillo@14: } while (cp != NULL); meillo@14: } meillo@14: meillo@14: static int meillo@14: fdcut(int fd) meillo@14: { meillo@14: struct iblok *ip; meillo@14: meillo@14: ip = ib_alloc(fd, 0); meillo@14: switch (method) { meillo@14: case 'b': meillo@14: if (nflag && multibyte) meillo@14: cutbn(ip); meillo@14: else meillo@14: cutb(ip); meillo@14: break; meillo@14: case 'c': meillo@14: if (multibyte) meillo@14: cutc(ip); meillo@14: else meillo@14: cutb(ip); meillo@14: break; meillo@14: case 'f': meillo@14: cutf(ip); meillo@14: break; meillo@14: } meillo@14: ib_free(ip); meillo@14: return 0; meillo@14: } meillo@14: meillo@14: static int meillo@14: fncut(const char *fn) meillo@14: { meillo@14: int fd, res; meillo@14: meillo@14: if (fn[0] == '-' && fn[1] == '\0') meillo@14: fd = 0; meillo@14: else if ((fd = open(fn, O_RDONLY)) < 0) { meillo@14: fprintf(stderr, "%s: WARNING: cannot open %s\n", progname, fn); meillo@14: return 1; meillo@14: } meillo@14: res = fdcut(fd); meillo@14: if (fd) meillo@14: close(fd); meillo@14: return res; meillo@14: } meillo@14: meillo@14: int meillo@14: main(int argc, char **argv) meillo@14: { meillo@14: const char optstring[] = "b:c:d:f:ns"; meillo@14: int i; meillo@14: meillo@14: progname = basename(argv[0]); meillo@14: setlocale(LC_CTYPE, ""); meillo@14: multibyte = MB_CUR_MAX > 1; meillo@14: #ifdef __GLIBC__ meillo@14: putenv("POSIXLY_CORRECT=1"); meillo@14: #endif meillo@14: while ((i = getopt(argc, argv, optstring)) != EOF) { meillo@14: switch (i) { meillo@14: case 'b': meillo@14: case 'c': meillo@14: case 'f': meillo@14: if (method && method != i) meillo@14: usage(); meillo@14: method = i; meillo@14: setlist(optarg); meillo@14: break; meillo@14: case 'd': meillo@14: setdelim(optarg); meillo@14: break; meillo@14: case 'n': meillo@14: nflag = 1; meillo@14: break; meillo@14: case 's': meillo@14: sflag = 1; meillo@14: break; meillo@14: default: meillo@14: usage(); meillo@14: } meillo@14: } meillo@14: /*if ((sflag && method != 'f') || (nflag && method != 'b')) meillo@14: usage();*/ meillo@14: if (method == 0) meillo@14: badlist(); meillo@14: if (argv[optind]) { meillo@14: for (i = optind; argv[i]; i++) meillo@14: errcnt |= fncut(argv[i]); meillo@14: } else meillo@14: errcnt |= fdcut(0); meillo@14: return errcnt; meillo@14: }