Mercurial > docs > cut
diff code/cut.c__heirloom.2012-05-20 @ 14:21ad1c1548c4
Code ausgewaehlter Implementierungen eingefuegt
Das Datum entspricht dem Dateiaenderungsdatum.
author | markus schnalke <meillo@marmaro.de> |
---|---|
date | Tue, 12 May 2015 06:46:59 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/cut.c__heirloom.2012-05-20 Tue May 12 06:46:59 2015 +0200 @@ -0,0 +1,405 @@ +/* + * cut - cut out fields of lines of files + * + * Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)cut.sl 1.20 (gritter) 5/29/05"; + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <libgen.h> +#include <limits.h> +#include <wchar.h> +#include <ctype.h> +#include <locale.h> + +#include "iblok.h" + +#if defined (__GLIBC__) && defined (_IO_putc_unlocked) +#undef putc +#define putc(c, f) _IO_putc_unlocked(c, f) +#endif + +struct range { + struct range *r_nxt; + long r_min; + long r_max; +}; + +static unsigned errcnt; /* count of errors */ +static int method; /* one of b, c, f */ +static int nflag; /* character boundary bytes */ +static int sflag; /* suppress lines w/o delimiters */ +static char *progname; /* argv[0] to main() */ +static wchar_t wcdelim = '\t'; /* delimiter character */ +static const char *mbdelim = "\t";/* delimiter character */ +struct range *fields; /* range list */ +static int multibyte; /* multibyte LC_CTYPE */ + +#define next(wc, s) (multibyte ? mbtowc(&(wc), s, MB_LEN_MAX) :\ + ((wc) = *(s) & 0377, (wc) != 0)) + +void * +lrealloc(void *vp, size_t nbytes) +{ + void *p; + + if ((p = realloc(vp, nbytes)) == NULL) { + write(2, "line too long\n", 14); + exit(076); + } + return p; +} + +void * +smalloc(size_t nbytes) +{ + void *p; + + if ((p = malloc(nbytes)) == NULL) { + write(2, "no memory\n", 11); + exit(077); + } + return p; +} + +static void +error(const char *s) +{ + fprintf(stderr, "%s: ERROR: %s\n", progname, s); + exit(2); +} + +static void +usage(void) +{ + error("Usage: cut [-s] [-d<char>] {-c<list> | -f<list>} file ..."); +} + +static void +badlist(void) +{ + error(method == 'b' ? "bad list for b/c/f option" : + "bad list for c/f option"); +} + +static void +setdelim(const char *s) +{ + int n; + + if ((n = next(wcdelim, s)) < 0 || (n > 0 && s[n] != '\0')) + error("no delimiter"); + mbdelim = s; +} + +static void +addrange(long m, long n) +{ + struct range *rp, *rq; + + rp = smalloc(sizeof *rp); + rp->r_nxt = NULL; + rp->r_min = m; + rp->r_max = n ? n : m; + if (fields) { + for (rq = fields; rq->r_nxt; rq = rq->r_nxt); + rq->r_nxt = rp; + } else + fields = rp; +} + +static int +have(long i) +{ + struct range *rp; + + for (rp = fields; rp; rp = rp->r_nxt) + if (i >= rp->r_min && i <= rp->r_max) + return 1; + return 0; +} + +#define mnreset() m = 0, n = 0, lp = &m + +static void +setlist(const char *s) +{ + char *cbuf, *cp; + long m, n; + long *lp; + + fields = NULL; + cbuf = smalloc(strlen(s) + 1); + mnreset(); + for (;;) { + if (*s == '-') { + if (m == 0) + m = 1; + n = LONG_MAX; + lp = &n; + s++; + } else if (*s == ',' || *s == ' ' || *s == '\t' || *s == '\0') { + if (m) + addrange(m, n); + mnreset(); + if (*s == '\0') + break; + s++; + } else if (isdigit(*s & 0377)) { + cp = cbuf; + do + *cp++ = *s++; + while (isdigit(*s & 0377)); + *cp = '\0'; + *lp = strtol(cbuf, NULL, 10); + } else + badlist(); + } + if (fields == NULL) + error("no fields"); + free(cbuf); +} + +static void +cutb(struct iblok *ip) +{ + int c, i; + + i = 1; + while ((c = ib_get(ip)) != EOF) { + if (c == '\n') { + i = 1; + putc(c, stdout); + } else if (have(i++)) + putc(c, stdout); + } +} + +static void +cutbn(struct iblok *ip) +{ + char *cp; + int i, m, n; + wint_t wc; + + i = 1; + while ((cp = ib_getw(ip, &wc, &n)) != NULL) { + if (wc == '\n') { + i = 1; + putc('\n', stdout); + } else { + if (have(i + n - 1)) + for (m = 0; m < n; m++) + putc(cp[m], stdout); + i += n; + } + } +} + +static void +cutc(struct iblok *ip) +{ + char *cp; + int i, n, m; + wint_t wc; + + i = 1; + while ((cp = ib_getw(ip, &wc, &n)) != NULL) { + if (wc == '\n') { + i = 1; + putc('\n', stdout); + } else if (wc != WEOF && have(i++)) { + for (m = 0; m < n; m++) + putc(cp[m], stdout); + } + } +} + +static void +cutf(struct iblok *ip) +{ + static char *line; + static size_t linesize; + char *cp, *lp, *lq; + int c, i, n, m, gotcha; + char b; + wint_t wc; + const int incr = 128; + + if (linesize == 0) + line = smalloc(linesize = incr); + lp = line; + gotcha = 0; + i = 1; + do { + if (multibyte) + cp = ib_getw(ip, &wc, &n); + else { + if ((c = ib_get(ip)) != EOF) { + wc = c; + b = (char)c; + cp = &b; + } else { + wc = WEOF; + cp = NULL; + } + n = 1; + } + if (cp == NULL || wc == '\n' || wc == wcdelim) { + if (have(i) && (!sflag || gotcha || wc == wcdelim) || + (!sflag && i == 1 && + (cp == NULL || wc == '\n'))) { + if (gotcha) + for (m = 0; mbdelim[m]; m++) + putc(mbdelim[m], stdout); + for (lq = line; lq < lp; lq++) + putc(*lq, stdout); + gotcha = 1; + } + if (wc == '\n') { + if (gotcha) + putc('\n', stdout); + i = 1; + gotcha = 0; + } else + i++; + lp = line; + } else { + for (m = 0; m < n; m++) { + if (lp >= &line[linesize]) { + size_t diff = lp - line; + line = lrealloc(line, linesize += incr); + lp = &line[diff]; + } + *lp++ = cp[m]; + } + } + } while (cp != NULL); +} + +static int +fdcut(int fd) +{ + struct iblok *ip; + + ip = ib_alloc(fd, 0); + switch (method) { + case 'b': + if (nflag && multibyte) + cutbn(ip); + else + cutb(ip); + break; + case 'c': + if (multibyte) + cutc(ip); + else + cutb(ip); + break; + case 'f': + cutf(ip); + break; + } + ib_free(ip); + return 0; +} + +static int +fncut(const char *fn) +{ + int fd, res; + + if (fn[0] == '-' && fn[1] == '\0') + fd = 0; + else if ((fd = open(fn, O_RDONLY)) < 0) { + fprintf(stderr, "%s: WARNING: cannot open %s\n", progname, fn); + return 1; + } + res = fdcut(fd); + if (fd) + close(fd); + return res; +} + +int +main(int argc, char **argv) +{ + const char optstring[] = "b:c:d:f:ns"; + int i; + + progname = basename(argv[0]); + setlocale(LC_CTYPE, ""); + multibyte = MB_CUR_MAX > 1; +#ifdef __GLIBC__ + putenv("POSIXLY_CORRECT=1"); +#endif + while ((i = getopt(argc, argv, optstring)) != EOF) { + switch (i) { + case 'b': + case 'c': + case 'f': + if (method && method != i) + usage(); + method = i; + setlist(optarg); + break; + case 'd': + setdelim(optarg); + break; + case 'n': + nflag = 1; + break; + case 's': + sflag = 1; + break; + default: + usage(); + } + } + /*if ((sflag && method != 'f') || (nflag && method != 'b')) + usage();*/ + if (method == 0) + badlist(); + if (argv[optind]) { + for (i = optind; argv[i]; i++) + errcnt |= fncut(argv[i]); + } else + errcnt |= fdcut(0); + return errcnt; +}