docs/cut

diff code/cut.c__heirloom.2012-05-20 @ 14:21ad1c1548c4

Code ausgewaehlter Implementierungen eingefuegt Das Datum entspricht dem Dateiaenderungsdatum.
author markus schnalke <meillo@marmaro.de>
date Tue, 12 May 2015 06:46:59 +0200
parents
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/code/cut.c__heirloom.2012-05-20	Tue May 12 06:46:59 2015 +0200
     1.3 @@ -0,0 +1,405 @@
     1.4 +/*
     1.5 + * cut - cut out fields of lines of files
     1.6 + *
     1.7 + * Gunnar Ritter, Freiburg i. Br., Germany, December 2002.
     1.8 + */
     1.9 +/*
    1.10 + * Copyright (c) 2003 Gunnar Ritter
    1.11 + *
    1.12 + * This software is provided 'as-is', without any express or implied
    1.13 + * warranty. In no event will the authors be held liable for any damages
    1.14 + * arising from the use of this software.
    1.15 + *
    1.16 + * Permission is granted to anyone to use this software for any purpose,
    1.17 + * including commercial applications, and to alter it and redistribute
    1.18 + * it freely, subject to the following restrictions:
    1.19 + *
    1.20 + * 1. The origin of this software must not be misrepresented; you must not
    1.21 + *    claim that you wrote the original software. If you use this software
    1.22 + *    in a product, an acknowledgment in the product documentation would be
    1.23 + *    appreciated but is not required.
    1.24 + *
    1.25 + * 2. Altered source versions must be plainly marked as such, and must not be
    1.26 + *    misrepresented as being the original software.
    1.27 + *
    1.28 + * 3. This notice may not be removed or altered from any source distribution.
    1.29 + */
    1.30 +
    1.31 +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4
    1.32 +#define	USED	__attribute__ ((used))
    1.33 +#elif defined __GNUC__
    1.34 +#define	USED	__attribute__ ((unused))
    1.35 +#else
    1.36 +#define	USED
    1.37 +#endif
    1.38 +static const char sccsid[] USED = "@(#)cut.sl	1.20 (gritter) 5/29/05";
    1.39 +
    1.40 +#include	<sys/types.h>
    1.41 +#include	<sys/stat.h>
    1.42 +#include	<fcntl.h>
    1.43 +#include	<unistd.h>
    1.44 +#include	<stdio.h>
    1.45 +#include	<string.h>
    1.46 +#include	<stdlib.h>
    1.47 +#include	<errno.h>
    1.48 +#include	<libgen.h>
    1.49 +#include	<limits.h>
    1.50 +#include	<wchar.h>
    1.51 +#include	<ctype.h>
    1.52 +#include	<locale.h>
    1.53 +
    1.54 +#include	"iblok.h"
    1.55 +
    1.56 +#if defined (__GLIBC__) && defined (_IO_putc_unlocked)
    1.57 +#undef	putc
    1.58 +#define	putc(c, f)	_IO_putc_unlocked(c, f)
    1.59 +#endif
    1.60 +
    1.61 +struct	range {
    1.62 +	struct range	*r_nxt;
    1.63 +	long	r_min;
    1.64 +	long	r_max;
    1.65 +};
    1.66 +
    1.67 +static unsigned	errcnt;			/* count of errors */
    1.68 +static int	method;			/* one of b, c, f */
    1.69 +static int	nflag;			/* character boundary bytes */
    1.70 +static int	sflag;			/* suppress lines w/o delimiters */
    1.71 +static char	*progname;		/* argv[0] to main() */
    1.72 +static wchar_t	wcdelim = '\t';		/* delimiter character */
    1.73 +static const char	*mbdelim = "\t";/* delimiter character */
    1.74 +struct range	*fields;		/* range list */
    1.75 +static int	multibyte;		/* multibyte LC_CTYPE */
    1.76 +
    1.77 +#define	next(wc, s)	(multibyte ? mbtowc(&(wc), s, MB_LEN_MAX) :\
    1.78 +				((wc) = *(s) & 0377, (wc) != 0))
    1.79 +
    1.80 +void *
    1.81 +lrealloc(void *vp, size_t nbytes)
    1.82 +{
    1.83 +	void	*p;
    1.84 +
    1.85 +	if ((p = realloc(vp, nbytes)) == NULL) {
    1.86 +		write(2, "line too long\n", 14);
    1.87 +		exit(076);
    1.88 +	}
    1.89 +	return p;
    1.90 +}
    1.91 +
    1.92 +void *
    1.93 +smalloc(size_t nbytes)
    1.94 +{
    1.95 +	void	*p;
    1.96 +
    1.97 +	if ((p = malloc(nbytes)) == NULL) {
    1.98 +		write(2, "no memory\n", 11);
    1.99 +		exit(077);
   1.100 +	}
   1.101 +	return p;
   1.102 +}
   1.103 +
   1.104 +static void
   1.105 +error(const char *s)
   1.106 +{
   1.107 +	fprintf(stderr, "%s: ERROR: %s\n", progname, s);
   1.108 +	exit(2);
   1.109 +}
   1.110 +
   1.111 +static void
   1.112 +usage(void)
   1.113 +{
   1.114 +	error("Usage: cut [-s] [-d<char>] {-c<list> | -f<list>} file ...");
   1.115 +}
   1.116 +
   1.117 +static void
   1.118 +badlist(void)
   1.119 +{
   1.120 +	error(method == 'b' ? "bad list for b/c/f option" : 
   1.121 +			"bad list for c/f option");
   1.122 +}
   1.123 +
   1.124 +static void
   1.125 +setdelim(const char *s)
   1.126 +{
   1.127 +	int	n;
   1.128 +
   1.129 +	if ((n = next(wcdelim, s)) < 0 || (n > 0 && s[n] != '\0'))
   1.130 +		error("no delimiter");
   1.131 +	mbdelim = s;
   1.132 +}
   1.133 +
   1.134 +static void
   1.135 +addrange(long m, long n)
   1.136 +{
   1.137 +	struct range	*rp, *rq;
   1.138 +
   1.139 +	rp = smalloc(sizeof *rp);
   1.140 +	rp->r_nxt = NULL;
   1.141 +	rp->r_min = m;
   1.142 +	rp->r_max = n ? n : m;
   1.143 +	if (fields) {
   1.144 +		for (rq = fields; rq->r_nxt; rq = rq->r_nxt);
   1.145 +		rq->r_nxt = rp;
   1.146 +	} else
   1.147 +		fields = rp;
   1.148 +}
   1.149 +
   1.150 +static int
   1.151 +have(long i)
   1.152 +{
   1.153 +	struct range	*rp;
   1.154 +
   1.155 +	for (rp = fields; rp; rp = rp->r_nxt)
   1.156 +		if (i >= rp->r_min && i <= rp->r_max)
   1.157 +			return 1;
   1.158 +	return 0;
   1.159 +}
   1.160 +
   1.161 +#define	mnreset()	m = 0, n = 0, lp = &m
   1.162 +
   1.163 +static void
   1.164 +setlist(const char *s)
   1.165 +{
   1.166 +	char	*cbuf, *cp;
   1.167 +	long	m, n;
   1.168 +	long	*lp;
   1.169 +
   1.170 +	fields = NULL;
   1.171 +	cbuf = smalloc(strlen(s) + 1);
   1.172 +	mnreset();
   1.173 +	for (;;) {
   1.174 +		if (*s == '-') {
   1.175 +			if (m == 0)
   1.176 +				m = 1;
   1.177 +			n = LONG_MAX;
   1.178 +			lp = &n;
   1.179 +			s++;
   1.180 +		} else if (*s == ',' || *s == ' ' || *s == '\t' || *s == '\0') {
   1.181 +			if (m)
   1.182 +				addrange(m, n);
   1.183 +			mnreset();
   1.184 +			if (*s == '\0')
   1.185 +				break;
   1.186 +			s++;
   1.187 +		} else if (isdigit(*s & 0377)) {
   1.188 +			cp = cbuf;
   1.189 +			do
   1.190 +				*cp++ = *s++;
   1.191 +			while (isdigit(*s & 0377));
   1.192 +			*cp = '\0';
   1.193 +			*lp = strtol(cbuf, NULL, 10);
   1.194 +		} else
   1.195 +			badlist();
   1.196 +	}
   1.197 +	if (fields == NULL)
   1.198 +		error("no fields");
   1.199 +	free(cbuf);
   1.200 +}
   1.201 +
   1.202 +static void
   1.203 +cutb(struct iblok *ip)
   1.204 +{
   1.205 +	int	c, i;
   1.206 +
   1.207 +	i = 1;
   1.208 +	while ((c = ib_get(ip)) != EOF) {
   1.209 +		if (c == '\n') {
   1.210 +			i = 1;
   1.211 +			putc(c, stdout);
   1.212 +		} else if (have(i++))
   1.213 +			putc(c, stdout);
   1.214 +	}
   1.215 +}
   1.216 +
   1.217 +static void
   1.218 +cutbn(struct iblok *ip)
   1.219 +{
   1.220 +	char	*cp;
   1.221 +	int	i, m, n;
   1.222 +	wint_t	wc;
   1.223 +
   1.224 +	i = 1;
   1.225 +	while ((cp = ib_getw(ip, &wc, &n)) != NULL) {
   1.226 +		if (wc == '\n') {
   1.227 +			i = 1;
   1.228 +			putc('\n', stdout);
   1.229 +		} else {
   1.230 +			if (have(i + n - 1))
   1.231 +				for (m = 0; m < n; m++)
   1.232 +					putc(cp[m], stdout);
   1.233 +			i += n;
   1.234 +		}
   1.235 +	}
   1.236 +}
   1.237 +
   1.238 +static void
   1.239 +cutc(struct iblok *ip)
   1.240 +{
   1.241 +	char	*cp;
   1.242 +	int	i, n, m;
   1.243 +	wint_t	wc;
   1.244 +
   1.245 +	i = 1;
   1.246 +	while ((cp = ib_getw(ip, &wc, &n)) != NULL) {
   1.247 +		if (wc == '\n') {
   1.248 +			i = 1;
   1.249 +			putc('\n', stdout);
   1.250 +		} else if (wc != WEOF && have(i++)) {
   1.251 +			for (m = 0; m < n; m++)
   1.252 +				putc(cp[m], stdout);
   1.253 +		}
   1.254 +	}
   1.255 +}
   1.256 +
   1.257 +static void
   1.258 +cutf(struct iblok *ip)
   1.259 +{
   1.260 +	static char	*line;
   1.261 +	static size_t	linesize;
   1.262 +	char	*cp, *lp, *lq;
   1.263 +	int	c, i, n, m, gotcha;
   1.264 +	char	b;
   1.265 +	wint_t	wc;
   1.266 +	const int	incr = 128;
   1.267 +
   1.268 +	if (linesize == 0)
   1.269 +		line = smalloc(linesize = incr);
   1.270 +	lp = line;
   1.271 +	gotcha = 0;
   1.272 +	i = 1;
   1.273 +	do {
   1.274 +		if (multibyte)
   1.275 +			cp = ib_getw(ip, &wc, &n);
   1.276 +		else {
   1.277 +			if ((c = ib_get(ip)) != EOF) {
   1.278 +				wc = c;
   1.279 +				b = (char)c;
   1.280 +				cp = &b;
   1.281 +			} else {
   1.282 +				wc = WEOF;
   1.283 +				cp = NULL;
   1.284 +			}
   1.285 +			n = 1;
   1.286 +		}
   1.287 +		if (cp == NULL || wc == '\n' || wc == wcdelim) {
   1.288 +			if (have(i) && (!sflag || gotcha || wc == wcdelim) ||
   1.289 +					(!sflag && i == 1 &&
   1.290 +						(cp == NULL || wc == '\n'))) {
   1.291 +				if (gotcha)
   1.292 +					for (m = 0; mbdelim[m]; m++)
   1.293 +						putc(mbdelim[m], stdout);
   1.294 +				for (lq = line; lq < lp; lq++)
   1.295 +					putc(*lq, stdout);
   1.296 +				gotcha = 1;
   1.297 +			}
   1.298 +			if (wc == '\n') {
   1.299 +				if (gotcha)
   1.300 +					putc('\n', stdout);
   1.301 +				i = 1;
   1.302 +				gotcha = 0;
   1.303 +			} else
   1.304 +				i++;
   1.305 +			lp = line;
   1.306 +		} else {
   1.307 +			for (m = 0; m < n; m++) {
   1.308 +				if (lp >= &line[linesize]) {
   1.309 +					size_t	diff = lp - line;
   1.310 +					line = lrealloc(line, linesize += incr);
   1.311 +					lp = &line[diff];
   1.312 +				}
   1.313 +				*lp++ = cp[m];
   1.314 +			}
   1.315 +		}
   1.316 +	} while (cp != NULL);
   1.317 +}
   1.318 +
   1.319 +static int
   1.320 +fdcut(int fd)
   1.321 +{
   1.322 +	struct iblok	*ip;
   1.323 +
   1.324 +	ip = ib_alloc(fd, 0);
   1.325 +	switch (method) {
   1.326 +	case 'b':
   1.327 +		if (nflag && multibyte)
   1.328 +			cutbn(ip);
   1.329 +		else
   1.330 +			cutb(ip);
   1.331 +		break;
   1.332 +	case 'c':
   1.333 +		if (multibyte)
   1.334 +			cutc(ip);
   1.335 +		else
   1.336 +			cutb(ip);
   1.337 +		break;
   1.338 +	case 'f':
   1.339 +		cutf(ip);
   1.340 +		break;
   1.341 +	}
   1.342 +	ib_free(ip);
   1.343 +	return 0;
   1.344 +}
   1.345 +
   1.346 +static int
   1.347 +fncut(const char *fn)
   1.348 +{
   1.349 +	int	fd, res;
   1.350 +
   1.351 +	if (fn[0] == '-' && fn[1] == '\0')
   1.352 +		fd = 0;
   1.353 +	else if ((fd = open(fn, O_RDONLY)) < 0) {
   1.354 +		fprintf(stderr, "%s: WARNING: cannot open %s\n", progname, fn);
   1.355 +		return 1;
   1.356 +	}
   1.357 +	res = fdcut(fd);
   1.358 +	if (fd)
   1.359 +		close(fd);
   1.360 +	return res;
   1.361 +}
   1.362 +
   1.363 +int
   1.364 +main(int argc, char **argv)
   1.365 +{
   1.366 +	const char	optstring[] = "b:c:d:f:ns";
   1.367 +	int	i;
   1.368 +
   1.369 +	progname = basename(argv[0]);
   1.370 +	setlocale(LC_CTYPE, "");
   1.371 +	multibyte = MB_CUR_MAX > 1;
   1.372 +#ifdef	__GLIBC__
   1.373 +	putenv("POSIXLY_CORRECT=1");
   1.374 +#endif
   1.375 +	while ((i = getopt(argc, argv, optstring)) != EOF) {
   1.376 +		switch (i) {
   1.377 +		case 'b':
   1.378 +		case 'c':
   1.379 +		case 'f':
   1.380 +			if (method && method != i)
   1.381 +				usage();
   1.382 +			method = i;
   1.383 +			setlist(optarg);
   1.384 +			break;
   1.385 +		case 'd':
   1.386 +			setdelim(optarg);
   1.387 +			break;
   1.388 +		case 'n':
   1.389 +			nflag = 1;
   1.390 +			break;
   1.391 +		case 's':
   1.392 +			sflag = 1;
   1.393 +			break;
   1.394 +		default:
   1.395 +			usage();
   1.396 +		}
   1.397 +	}
   1.398 +	/*if ((sflag && method != 'f') || (nflag && method != 'b'))
   1.399 +		usage();*/
   1.400 +	if (method == 0)
   1.401 +		badlist();
   1.402 +	if (argv[optind]) {
   1.403 +		for (i = optind; argv[i]; i++)
   1.404 +			errcnt |= fncut(argv[i]);
   1.405 +	} else
   1.406 +		errcnt |= fdcut(0);
   1.407 +	return errcnt;
   1.408 +}