docs/cut

diff code/cut.c__freebsd.2012-11-24 @ 14:21ad1c1548c4

Code ausgewaehlter Implementierungen eingefuegt Das Datum entspricht dem Dateiaenderungsdatum.
author markus schnalke <meillo@marmaro.de>
date Tue, 12 May 2015 06:46:59 +0200
parents
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/code/cut.c__freebsd.2012-11-24	Tue May 12 06:46:59 2015 +0200
     1.3 @@ -0,0 +1,479 @@
     1.4 +/*
     1.5 + * Copyright (c) 1989, 1993
     1.6 + *	The Regents of the University of California.  All rights reserved.
     1.7 + *
     1.8 + * This code is derived from software contributed to Berkeley by
     1.9 + * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
    1.10 + *
    1.11 + * Redistribution and use in source and binary forms, with or without
    1.12 + * modification, are permitted provided that the following conditions
    1.13 + * are met:
    1.14 + * 1. Redistributions of source code must retain the above copyright
    1.15 + *    notice, this list of conditions and the following disclaimer.
    1.16 + * 2. Redistributions in binary form must reproduce the above copyright
    1.17 + *    notice, this list of conditions and the following disclaimer in the
    1.18 + *    documentation and/or other materials provided with the distribution.
    1.19 + * 4. Neither the name of the University nor the names of its contributors
    1.20 + *    may be used to endorse or promote products derived from this software
    1.21 + *    without specific prior written permission.
    1.22 + *
    1.23 + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
    1.24 + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    1.25 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    1.26 + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
    1.27 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    1.28 + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    1.29 + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    1.30 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    1.31 + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    1.32 + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    1.33 + * SUCH DAMAGE.
    1.34 + */
    1.35 +
    1.36 +#ifndef lint
    1.37 +static const char copyright[] =
    1.38 +"@(#) Copyright (c) 1989, 1993\n\
    1.39 +	The Regents of the University of California.  All rights reserved.\n";
    1.40 +static const char sccsid[] = "@(#)cut.c	8.3 (Berkeley) 5/4/95";
    1.41 +#endif /* not lint */
    1.42 +#include <sys/cdefs.h>
    1.43 +__FBSDID("$FreeBSD$");
    1.44 +
    1.45 +#include <ctype.h>
    1.46 +#include <err.h>
    1.47 +#include <errno.h>
    1.48 +#include <limits.h>
    1.49 +#include <locale.h>
    1.50 +#include <stdio.h>
    1.51 +#include <stdlib.h>
    1.52 +#include <string.h>
    1.53 +#include <unistd.h>
    1.54 +#include <wchar.h>
    1.55 +
    1.56 +static int	bflag;
    1.57 +static int	cflag;
    1.58 +static wchar_t	dchar;
    1.59 +static char	dcharmb[MB_LEN_MAX + 1];
    1.60 +static int	dflag;
    1.61 +static int	fflag;
    1.62 +static int	nflag;
    1.63 +static int	sflag;
    1.64 +static int	wflag;
    1.65 +
    1.66 +static size_t	autostart, autostop, maxval;
    1.67 +static char *	positions;
    1.68 +
    1.69 +static int	b_cut(FILE *, const char *);
    1.70 +static int	b_n_cut(FILE *, const char *);
    1.71 +static int	c_cut(FILE *, const char *);
    1.72 +static int	f_cut(FILE *, const char *);
    1.73 +static void	get_list(char *);
    1.74 +static int	is_delim(wchar_t);
    1.75 +static void	needpos(size_t);
    1.76 +static void	usage(void);
    1.77 +
    1.78 +int
    1.79 +main(int argc, char *argv[])
    1.80 +{
    1.81 +	FILE *fp;
    1.82 +	int (*fcn)(FILE *, const char *);
    1.83 +	int ch, rval;
    1.84 +	size_t n;
    1.85 +
    1.86 +	setlocale(LC_ALL, "");
    1.87 +
    1.88 +	fcn = NULL;
    1.89 +	dchar = '\t';			/* default delimiter is \t */
    1.90 +	strcpy(dcharmb, "\t");
    1.91 +
    1.92 +	while ((ch = getopt(argc, argv, "b:c:d:f:snw")) != -1)
    1.93 +		switch(ch) {
    1.94 +		case 'b':
    1.95 +			get_list(optarg);
    1.96 +			bflag = 1;
    1.97 +			break;
    1.98 +		case 'c':
    1.99 +			get_list(optarg);
   1.100 +			cflag = 1;
   1.101 +			break;
   1.102 +		case 'd':
   1.103 +			n = mbrtowc(&dchar, optarg, MB_LEN_MAX, NULL);
   1.104 +			if (dchar == '\0' || n != strlen(optarg))
   1.105 +				errx(1, "bad delimiter");
   1.106 +			strcpy(dcharmb, optarg);
   1.107 +			dflag = 1;
   1.108 +			break;
   1.109 +		case 'f':
   1.110 +			get_list(optarg);
   1.111 +			fflag = 1;
   1.112 +			break;
   1.113 +		case 's':
   1.114 +			sflag = 1;
   1.115 +			break;
   1.116 +		case 'n':
   1.117 +			nflag = 1;
   1.118 +			break;
   1.119 +		case 'w':
   1.120 +			wflag = 1;
   1.121 +			break;
   1.122 +		case '?':
   1.123 +		default:
   1.124 +			usage();
   1.125 +		}
   1.126 +	argc -= optind;
   1.127 +	argv += optind;
   1.128 +
   1.129 +	if (fflag) {
   1.130 +		if (bflag || cflag || nflag || (wflag && dflag))
   1.131 +			usage();
   1.132 +	} else if (!(bflag || cflag) || dflag || sflag || wflag)
   1.133 +		usage();
   1.134 +	else if (!bflag && nflag)
   1.135 +		usage();
   1.136 +
   1.137 +	if (fflag)
   1.138 +		fcn = f_cut;
   1.139 +	else if (cflag)
   1.140 +		fcn = MB_CUR_MAX > 1 ? c_cut : b_cut;
   1.141 +	else if (bflag)
   1.142 +		fcn = nflag && MB_CUR_MAX > 1 ? b_n_cut : b_cut;
   1.143 +
   1.144 +	rval = 0;
   1.145 +	if (*argv)
   1.146 +		for (; *argv; ++argv) {
   1.147 +			if (strcmp(*argv, "-") == 0)
   1.148 +				rval |= fcn(stdin, "stdin");
   1.149 +			else {
   1.150 +				if (!(fp = fopen(*argv, "r"))) {
   1.151 +					warn("%s", *argv);
   1.152 +					rval = 1;
   1.153 +					continue;
   1.154 +				}
   1.155 +				fcn(fp, *argv);
   1.156 +				(void)fclose(fp);
   1.157 +			}
   1.158 +		}
   1.159 +	else
   1.160 +		rval = fcn(stdin, "stdin");
   1.161 +	exit(rval);
   1.162 +}
   1.163 +
   1.164 +static void
   1.165 +get_list(char *list)
   1.166 +{
   1.167 +	size_t setautostart, start, stop;
   1.168 +	char *pos;
   1.169 +	char *p;
   1.170 +
   1.171 +	/*
   1.172 +	 * set a byte in the positions array to indicate if a field or
   1.173 +	 * column is to be selected; use +1, it's 1-based, not 0-based.
   1.174 +	 * Numbers and number ranges may be overlapping, repeated, and in
   1.175 +	 * any order. We handle "-3-5" although there's no real reason to.
   1.176 +	 */
   1.177 +	for (; (p = strsep(&list, ", \t")) != NULL;) {
   1.178 +		setautostart = start = stop = 0;
   1.179 +		if (*p == '-') {
   1.180 +			++p;
   1.181 +			setautostart = 1;
   1.182 +		}
   1.183 +		if (isdigit((unsigned char)*p)) {
   1.184 +			start = stop = strtol(p, &p, 10);
   1.185 +			if (setautostart && start > autostart)
   1.186 +				autostart = start;
   1.187 +		}
   1.188 +		if (*p == '-') {
   1.189 +			if (isdigit((unsigned char)p[1]))
   1.190 +				stop = strtol(p + 1, &p, 10);
   1.191 +			if (*p == '-') {
   1.192 +				++p;
   1.193 +				if (!autostop || autostop > stop)
   1.194 +					autostop = stop;
   1.195 +			}
   1.196 +		}
   1.197 +		if (*p)
   1.198 +			errx(1, "[-bcf] list: illegal list value");
   1.199 +		if (!stop || !start)
   1.200 +			errx(1, "[-bcf] list: values may not include zero");
   1.201 +		if (maxval < stop) {
   1.202 +			maxval = stop;
   1.203 +			needpos(maxval + 1);
   1.204 +		}
   1.205 +		for (pos = positions + start; start++ <= stop; *pos++ = 1);
   1.206 +	}
   1.207 +
   1.208 +	/* overlapping ranges */
   1.209 +	if (autostop && maxval > autostop) {
   1.210 +		maxval = autostop;
   1.211 +		needpos(maxval + 1);
   1.212 +	}
   1.213 +
   1.214 +	/* set autostart */
   1.215 +	if (autostart)
   1.216 +		memset(positions + 1, '1', autostart);
   1.217 +}
   1.218 +
   1.219 +static void
   1.220 +needpos(size_t n)
   1.221 +{
   1.222 +	static size_t npos;
   1.223 +	size_t oldnpos;
   1.224 +
   1.225 +	/* Grow the positions array to at least the specified size. */
   1.226 +	if (n > npos) {
   1.227 +		oldnpos = npos;
   1.228 +		if (npos == 0)
   1.229 +			npos = n;
   1.230 +		while (n > npos)
   1.231 +			npos *= 2;
   1.232 +		if ((positions = realloc(positions, npos)) == NULL)
   1.233 +			err(1, "realloc");
   1.234 +		memset((char *)positions + oldnpos, 0, npos - oldnpos);
   1.235 +	}
   1.236 +}
   1.237 +
   1.238 +static int
   1.239 +b_cut(FILE *fp, const char *fname __unused)
   1.240 +{
   1.241 +	int ch, col;
   1.242 +	char *pos;
   1.243 +
   1.244 +	ch = 0;
   1.245 +	for (;;) {
   1.246 +		pos = positions + 1;
   1.247 +		for (col = maxval; col; --col) {
   1.248 +			if ((ch = getc(fp)) == EOF)
   1.249 +				return (0);
   1.250 +			if (ch == '\n')
   1.251 +				break;
   1.252 +			if (*pos++)
   1.253 +				(void)putchar(ch);
   1.254 +		}
   1.255 +		if (ch != '\n') {
   1.256 +			if (autostop)
   1.257 +				while ((ch = getc(fp)) != EOF && ch != '\n')
   1.258 +					(void)putchar(ch);
   1.259 +			else
   1.260 +				while ((ch = getc(fp)) != EOF && ch != '\n');
   1.261 +		}
   1.262 +		(void)putchar('\n');
   1.263 +	}
   1.264 +	return (0);
   1.265 +}
   1.266 +
   1.267 +/*
   1.268 + * Cut based on byte positions, taking care not to split multibyte characters.
   1.269 + * Although this function also handles the case where -n is not specified,
   1.270 + * b_cut() ought to be much faster.
   1.271 + */
   1.272 +static int
   1.273 +b_n_cut(FILE *fp, const char *fname)
   1.274 +{
   1.275 +	size_t col, i, lbuflen;
   1.276 +	char *lbuf;
   1.277 +	int canwrite, clen, warned;
   1.278 +	mbstate_t mbs;
   1.279 +
   1.280 +	memset(&mbs, 0, sizeof(mbs));
   1.281 +	warned = 0;
   1.282 +	while ((lbuf = fgetln(fp, &lbuflen)) != NULL) {
   1.283 +		for (col = 0; lbuflen > 0; col += clen) {
   1.284 +			if ((clen = mbrlen(lbuf, lbuflen, &mbs)) < 0) {
   1.285 +				if (!warned) {
   1.286 +					warn("%s", fname);
   1.287 +					warned = 1;
   1.288 +				}
   1.289 +				memset(&mbs, 0, sizeof(mbs));
   1.290 +				clen = 1;
   1.291 +			}
   1.292 +			if (clen == 0 || *lbuf == '\n')
   1.293 +				break;
   1.294 +			if (col < maxval && !positions[1 + col]) {
   1.295 +				/*
   1.296 +				 * Print the character if (1) after an initial
   1.297 +				 * segment of un-selected bytes, the rest of
   1.298 +				 * it is selected, and (2) the last byte is
   1.299 +				 * selected.
   1.300 +				 */
   1.301 +				i = col;
   1.302 +				while (i < col + clen && i < maxval &&
   1.303 +				    !positions[1 + i])
   1.304 +					i++;
   1.305 +				canwrite = i < col + clen;
   1.306 +				for (; i < col + clen && i < maxval; i++)
   1.307 +					canwrite &= positions[1 + i];
   1.308 +				if (canwrite)
   1.309 +					fwrite(lbuf, 1, clen, stdout);
   1.310 +			} else {
   1.311 +				/*
   1.312 +				 * Print the character if all of it has
   1.313 +				 * been selected.
   1.314 +				 */
   1.315 +				canwrite = 1;
   1.316 +				for (i = col; i < col + clen; i++)
   1.317 +					if ((i >= maxval && !autostop) ||
   1.318 +					    (i < maxval && !positions[1 + i])) {
   1.319 +						canwrite = 0;
   1.320 +						break;
   1.321 +					}
   1.322 +				if (canwrite)
   1.323 +					fwrite(lbuf, 1, clen, stdout);
   1.324 +			}
   1.325 +			lbuf += clen;
   1.326 +			lbuflen -= clen;
   1.327 +		}
   1.328 +		if (lbuflen > 0)
   1.329 +			putchar('\n');
   1.330 +	}
   1.331 +	return (warned);
   1.332 +}
   1.333 +
   1.334 +static int
   1.335 +c_cut(FILE *fp, const char *fname)
   1.336 +{
   1.337 +	wint_t ch;
   1.338 +	int col;
   1.339 +	char *pos;
   1.340 +
   1.341 +	ch = 0;
   1.342 +	for (;;) {
   1.343 +		pos = positions + 1;
   1.344 +		for (col = maxval; col; --col) {
   1.345 +			if ((ch = getwc(fp)) == WEOF)
   1.346 +				goto out;
   1.347 +			if (ch == '\n')
   1.348 +				break;
   1.349 +			if (*pos++)
   1.350 +				(void)putwchar(ch);
   1.351 +		}
   1.352 +		if (ch != '\n') {
   1.353 +			if (autostop)
   1.354 +				while ((ch = getwc(fp)) != WEOF && ch != '\n')
   1.355 +					(void)putwchar(ch);
   1.356 +			else
   1.357 +				while ((ch = getwc(fp)) != WEOF && ch != '\n');
   1.358 +		}
   1.359 +		(void)putwchar('\n');
   1.360 +	}
   1.361 +out:
   1.362 +	if (ferror(fp)) {
   1.363 +		warn("%s", fname);
   1.364 +		return (1);
   1.365 +	}
   1.366 +	return (0);
   1.367 +}
   1.368 +
   1.369 +static int
   1.370 +is_delim(wchar_t ch)
   1.371 +{
   1.372 +	if (wflag) {
   1.373 +		if (ch == ' ' || ch == '\t')
   1.374 +			return 1;
   1.375 +	} else {
   1.376 +		if (ch == dchar)
   1.377 +			return 1;
   1.378 +	}
   1.379 +	return 0;
   1.380 +}
   1.381 +
   1.382 +static int
   1.383 +f_cut(FILE *fp, const char *fname)
   1.384 +{
   1.385 +	wchar_t ch;
   1.386 +	int field, i, isdelim;
   1.387 +	char *pos, *p;
   1.388 +	int output;
   1.389 +	char *lbuf, *mlbuf;
   1.390 +	size_t clen, lbuflen, reallen;
   1.391 +
   1.392 +	mlbuf = NULL;
   1.393 +	while ((lbuf = fgetln(fp, &lbuflen)) != NULL) {
   1.394 +		reallen = lbuflen;
   1.395 +		/* Assert EOL has a newline. */
   1.396 +		if (*(lbuf + lbuflen - 1) != '\n') {
   1.397 +			/* Can't have > 1 line with no trailing newline. */
   1.398 +			mlbuf = malloc(lbuflen + 1);
   1.399 +			if (mlbuf == NULL)
   1.400 +				err(1, "malloc");
   1.401 +			memcpy(mlbuf, lbuf, lbuflen);
   1.402 +			*(mlbuf + lbuflen) = '\n';
   1.403 +			lbuf = mlbuf;
   1.404 +			reallen++;
   1.405 +		}
   1.406 +		output = 0;
   1.407 +		for (isdelim = 0, p = lbuf;; p += clen) {
   1.408 +			clen = mbrtowc(&ch, p, lbuf + reallen - p, NULL);
   1.409 +			if (clen == (size_t)-1 || clen == (size_t)-2) {
   1.410 +				warnc(EILSEQ, "%s", fname);
   1.411 +				free(mlbuf);
   1.412 +				return (1);
   1.413 +			}
   1.414 +			if (clen == 0)
   1.415 +				clen = 1;
   1.416 +			/* this should work if newline is delimiter */
   1.417 +			if (is_delim(ch))
   1.418 +				isdelim = 1;
   1.419 +			if (ch == '\n') {
   1.420 +				if (!isdelim && !sflag)
   1.421 +					(void)fwrite(lbuf, lbuflen, 1, stdout);
   1.422 +				break;
   1.423 +			}
   1.424 +		}
   1.425 +		if (!isdelim)
   1.426 +			continue;
   1.427 +
   1.428 +		pos = positions + 1;
   1.429 +		for (field = maxval, p = lbuf; field; --field, ++pos) {
   1.430 +			if (*pos && output++)
   1.431 +				for (i = 0; dcharmb[i] != '\0'; i++)
   1.432 +					putchar(dcharmb[i]);
   1.433 +			for (;;) {
   1.434 +				clen = mbrtowc(&ch, p, lbuf + reallen - p,
   1.435 +				    NULL);
   1.436 +				if (clen == (size_t)-1 || clen == (size_t)-2) {
   1.437 +					warnc(EILSEQ, "%s", fname);
   1.438 +					free(mlbuf);
   1.439 +					return (1);
   1.440 +				}
   1.441 +				if (clen == 0)
   1.442 +					clen = 1;
   1.443 +				p += clen;
   1.444 +				if (ch == '\n' || is_delim(ch)) {
   1.445 +					/* compress whitespace */
   1.446 +					if (wflag && ch != '\n')
   1.447 +						while (is_delim(*p))
   1.448 +							p++;
   1.449 +					break;
   1.450 +				}
   1.451 +				if (*pos)
   1.452 +					for (i = 0; i < (int)clen; i++)
   1.453 +						putchar(p[i - clen]);
   1.454 +			}
   1.455 +			if (ch == '\n')
   1.456 +				break;
   1.457 +		}
   1.458 +		if (ch != '\n') {
   1.459 +			if (autostop) {
   1.460 +				if (output)
   1.461 +					for (i = 0; dcharmb[i] != '\0'; i++)
   1.462 +						putchar(dcharmb[i]);
   1.463 +				for (; (ch = *p) != '\n'; ++p)
   1.464 +					(void)putchar(ch);
   1.465 +			} else
   1.466 +				for (; (ch = *p) != '\n'; ++p);
   1.467 +		}
   1.468 +		(void)putchar('\n');
   1.469 +	}
   1.470 +	free(mlbuf);
   1.471 +	return (0);
   1.472 +}
   1.473 +
   1.474 +static void
   1.475 +usage(void)
   1.476 +{
   1.477 +	(void)fprintf(stderr, "%s\n%s\n%s\n",
   1.478 +		"usage: cut -b list [-n] [file ...]",
   1.479 +		"       cut -c list [file ...]",
   1.480 +		"       cut -f list [-s] [-w | -d delim] [file ...]");
   1.481 +	exit(1);
   1.482 +}