docs/cut
diff code/cut.c__freebsd.2012-11-24 @ 14:21ad1c1548c4
Code ausgewaehlter Implementierungen eingefuegt
Das Datum entspricht dem Dateiaenderungsdatum.
author | markus schnalke <meillo@marmaro.de> |
---|---|
date | Tue, 12 May 2015 06:46:59 +0200 |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/code/cut.c__freebsd.2012-11-24 Tue May 12 06:46:59 2015 +0200 1.3 @@ -0,0 +1,479 @@ 1.4 +/* 1.5 + * Copyright (c) 1989, 1993 1.6 + * The Regents of the University of California. All rights reserved. 1.7 + * 1.8 + * This code is derived from software contributed to Berkeley by 1.9 + * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 1.10 + * 1.11 + * Redistribution and use in source and binary forms, with or without 1.12 + * modification, are permitted provided that the following conditions 1.13 + * are met: 1.14 + * 1. Redistributions of source code must retain the above copyright 1.15 + * notice, this list of conditions and the following disclaimer. 1.16 + * 2. Redistributions in binary form must reproduce the above copyright 1.17 + * notice, this list of conditions and the following disclaimer in the 1.18 + * documentation and/or other materials provided with the distribution. 1.19 + * 4. Neither the name of the University nor the names of its contributors 1.20 + * may be used to endorse or promote products derived from this software 1.21 + * without specific prior written permission. 1.22 + * 1.23 + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 1.24 + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1.25 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1.26 + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 1.27 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1.28 + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 1.29 + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 1.30 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 1.31 + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 1.32 + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 1.33 + * SUCH DAMAGE. 1.34 + */ 1.35 + 1.36 +#ifndef lint 1.37 +static const char copyright[] = 1.38 +"@(#) Copyright (c) 1989, 1993\n\ 1.39 + The Regents of the University of California. All rights reserved.\n"; 1.40 +static const char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 1.41 +#endif /* not lint */ 1.42 +#include <sys/cdefs.h> 1.43 +__FBSDID("$FreeBSD$"); 1.44 + 1.45 +#include <ctype.h> 1.46 +#include <err.h> 1.47 +#include <errno.h> 1.48 +#include <limits.h> 1.49 +#include <locale.h> 1.50 +#include <stdio.h> 1.51 +#include <stdlib.h> 1.52 +#include <string.h> 1.53 +#include <unistd.h> 1.54 +#include <wchar.h> 1.55 + 1.56 +static int bflag; 1.57 +static int cflag; 1.58 +static wchar_t dchar; 1.59 +static char dcharmb[MB_LEN_MAX + 1]; 1.60 +static int dflag; 1.61 +static int fflag; 1.62 +static int nflag; 1.63 +static int sflag; 1.64 +static int wflag; 1.65 + 1.66 +static size_t autostart, autostop, maxval; 1.67 +static char * positions; 1.68 + 1.69 +static int b_cut(FILE *, const char *); 1.70 +static int b_n_cut(FILE *, const char *); 1.71 +static int c_cut(FILE *, const char *); 1.72 +static int f_cut(FILE *, const char *); 1.73 +static void get_list(char *); 1.74 +static int is_delim(wchar_t); 1.75 +static void needpos(size_t); 1.76 +static void usage(void); 1.77 + 1.78 +int 1.79 +main(int argc, char *argv[]) 1.80 +{ 1.81 + FILE *fp; 1.82 + int (*fcn)(FILE *, const char *); 1.83 + int ch, rval; 1.84 + size_t n; 1.85 + 1.86 + setlocale(LC_ALL, ""); 1.87 + 1.88 + fcn = NULL; 1.89 + dchar = '\t'; /* default delimiter is \t */ 1.90 + strcpy(dcharmb, "\t"); 1.91 + 1.92 + while ((ch = getopt(argc, argv, "b:c:d:f:snw")) != -1) 1.93 + switch(ch) { 1.94 + case 'b': 1.95 + get_list(optarg); 1.96 + bflag = 1; 1.97 + break; 1.98 + case 'c': 1.99 + get_list(optarg); 1.100 + cflag = 1; 1.101 + break; 1.102 + case 'd': 1.103 + n = mbrtowc(&dchar, optarg, MB_LEN_MAX, NULL); 1.104 + if (dchar == '\0' || n != strlen(optarg)) 1.105 + errx(1, "bad delimiter"); 1.106 + strcpy(dcharmb, optarg); 1.107 + dflag = 1; 1.108 + break; 1.109 + case 'f': 1.110 + get_list(optarg); 1.111 + fflag = 1; 1.112 + break; 1.113 + case 's': 1.114 + sflag = 1; 1.115 + break; 1.116 + case 'n': 1.117 + nflag = 1; 1.118 + break; 1.119 + case 'w': 1.120 + wflag = 1; 1.121 + break; 1.122 + case '?': 1.123 + default: 1.124 + usage(); 1.125 + } 1.126 + argc -= optind; 1.127 + argv += optind; 1.128 + 1.129 + if (fflag) { 1.130 + if (bflag || cflag || nflag || (wflag && dflag)) 1.131 + usage(); 1.132 + } else if (!(bflag || cflag) || dflag || sflag || wflag) 1.133 + usage(); 1.134 + else if (!bflag && nflag) 1.135 + usage(); 1.136 + 1.137 + if (fflag) 1.138 + fcn = f_cut; 1.139 + else if (cflag) 1.140 + fcn = MB_CUR_MAX > 1 ? c_cut : b_cut; 1.141 + else if (bflag) 1.142 + fcn = nflag && MB_CUR_MAX > 1 ? b_n_cut : b_cut; 1.143 + 1.144 + rval = 0; 1.145 + if (*argv) 1.146 + for (; *argv; ++argv) { 1.147 + if (strcmp(*argv, "-") == 0) 1.148 + rval |= fcn(stdin, "stdin"); 1.149 + else { 1.150 + if (!(fp = fopen(*argv, "r"))) { 1.151 + warn("%s", *argv); 1.152 + rval = 1; 1.153 + continue; 1.154 + } 1.155 + fcn(fp, *argv); 1.156 + (void)fclose(fp); 1.157 + } 1.158 + } 1.159 + else 1.160 + rval = fcn(stdin, "stdin"); 1.161 + exit(rval); 1.162 +} 1.163 + 1.164 +static void 1.165 +get_list(char *list) 1.166 +{ 1.167 + size_t setautostart, start, stop; 1.168 + char *pos; 1.169 + char *p; 1.170 + 1.171 + /* 1.172 + * set a byte in the positions array to indicate if a field or 1.173 + * column is to be selected; use +1, it's 1-based, not 0-based. 1.174 + * Numbers and number ranges may be overlapping, repeated, and in 1.175 + * any order. We handle "-3-5" although there's no real reason to. 1.176 + */ 1.177 + for (; (p = strsep(&list, ", \t")) != NULL;) { 1.178 + setautostart = start = stop = 0; 1.179 + if (*p == '-') { 1.180 + ++p; 1.181 + setautostart = 1; 1.182 + } 1.183 + if (isdigit((unsigned char)*p)) { 1.184 + start = stop = strtol(p, &p, 10); 1.185 + if (setautostart && start > autostart) 1.186 + autostart = start; 1.187 + } 1.188 + if (*p == '-') { 1.189 + if (isdigit((unsigned char)p[1])) 1.190 + stop = strtol(p + 1, &p, 10); 1.191 + if (*p == '-') { 1.192 + ++p; 1.193 + if (!autostop || autostop > stop) 1.194 + autostop = stop; 1.195 + } 1.196 + } 1.197 + if (*p) 1.198 + errx(1, "[-bcf] list: illegal list value"); 1.199 + if (!stop || !start) 1.200 + errx(1, "[-bcf] list: values may not include zero"); 1.201 + if (maxval < stop) { 1.202 + maxval = stop; 1.203 + needpos(maxval + 1); 1.204 + } 1.205 + for (pos = positions + start; start++ <= stop; *pos++ = 1); 1.206 + } 1.207 + 1.208 + /* overlapping ranges */ 1.209 + if (autostop && maxval > autostop) { 1.210 + maxval = autostop; 1.211 + needpos(maxval + 1); 1.212 + } 1.213 + 1.214 + /* set autostart */ 1.215 + if (autostart) 1.216 + memset(positions + 1, '1', autostart); 1.217 +} 1.218 + 1.219 +static void 1.220 +needpos(size_t n) 1.221 +{ 1.222 + static size_t npos; 1.223 + size_t oldnpos; 1.224 + 1.225 + /* Grow the positions array to at least the specified size. */ 1.226 + if (n > npos) { 1.227 + oldnpos = npos; 1.228 + if (npos == 0) 1.229 + npos = n; 1.230 + while (n > npos) 1.231 + npos *= 2; 1.232 + if ((positions = realloc(positions, npos)) == NULL) 1.233 + err(1, "realloc"); 1.234 + memset((char *)positions + oldnpos, 0, npos - oldnpos); 1.235 + } 1.236 +} 1.237 + 1.238 +static int 1.239 +b_cut(FILE *fp, const char *fname __unused) 1.240 +{ 1.241 + int ch, col; 1.242 + char *pos; 1.243 + 1.244 + ch = 0; 1.245 + for (;;) { 1.246 + pos = positions + 1; 1.247 + for (col = maxval; col; --col) { 1.248 + if ((ch = getc(fp)) == EOF) 1.249 + return (0); 1.250 + if (ch == '\n') 1.251 + break; 1.252 + if (*pos++) 1.253 + (void)putchar(ch); 1.254 + } 1.255 + if (ch != '\n') { 1.256 + if (autostop) 1.257 + while ((ch = getc(fp)) != EOF && ch != '\n') 1.258 + (void)putchar(ch); 1.259 + else 1.260 + while ((ch = getc(fp)) != EOF && ch != '\n'); 1.261 + } 1.262 + (void)putchar('\n'); 1.263 + } 1.264 + return (0); 1.265 +} 1.266 + 1.267 +/* 1.268 + * Cut based on byte positions, taking care not to split multibyte characters. 1.269 + * Although this function also handles the case where -n is not specified, 1.270 + * b_cut() ought to be much faster. 1.271 + */ 1.272 +static int 1.273 +b_n_cut(FILE *fp, const char *fname) 1.274 +{ 1.275 + size_t col, i, lbuflen; 1.276 + char *lbuf; 1.277 + int canwrite, clen, warned; 1.278 + mbstate_t mbs; 1.279 + 1.280 + memset(&mbs, 0, sizeof(mbs)); 1.281 + warned = 0; 1.282 + while ((lbuf = fgetln(fp, &lbuflen)) != NULL) { 1.283 + for (col = 0; lbuflen > 0; col += clen) { 1.284 + if ((clen = mbrlen(lbuf, lbuflen, &mbs)) < 0) { 1.285 + if (!warned) { 1.286 + warn("%s", fname); 1.287 + warned = 1; 1.288 + } 1.289 + memset(&mbs, 0, sizeof(mbs)); 1.290 + clen = 1; 1.291 + } 1.292 + if (clen == 0 || *lbuf == '\n') 1.293 + break; 1.294 + if (col < maxval && !positions[1 + col]) { 1.295 + /* 1.296 + * Print the character if (1) after an initial 1.297 + * segment of un-selected bytes, the rest of 1.298 + * it is selected, and (2) the last byte is 1.299 + * selected. 1.300 + */ 1.301 + i = col; 1.302 + while (i < col + clen && i < maxval && 1.303 + !positions[1 + i]) 1.304 + i++; 1.305 + canwrite = i < col + clen; 1.306 + for (; i < col + clen && i < maxval; i++) 1.307 + canwrite &= positions[1 + i]; 1.308 + if (canwrite) 1.309 + fwrite(lbuf, 1, clen, stdout); 1.310 + } else { 1.311 + /* 1.312 + * Print the character if all of it has 1.313 + * been selected. 1.314 + */ 1.315 + canwrite = 1; 1.316 + for (i = col; i < col + clen; i++) 1.317 + if ((i >= maxval && !autostop) || 1.318 + (i < maxval && !positions[1 + i])) { 1.319 + canwrite = 0; 1.320 + break; 1.321 + } 1.322 + if (canwrite) 1.323 + fwrite(lbuf, 1, clen, stdout); 1.324 + } 1.325 + lbuf += clen; 1.326 + lbuflen -= clen; 1.327 + } 1.328 + if (lbuflen > 0) 1.329 + putchar('\n'); 1.330 + } 1.331 + return (warned); 1.332 +} 1.333 + 1.334 +static int 1.335 +c_cut(FILE *fp, const char *fname) 1.336 +{ 1.337 + wint_t ch; 1.338 + int col; 1.339 + char *pos; 1.340 + 1.341 + ch = 0; 1.342 + for (;;) { 1.343 + pos = positions + 1; 1.344 + for (col = maxval; col; --col) { 1.345 + if ((ch = getwc(fp)) == WEOF) 1.346 + goto out; 1.347 + if (ch == '\n') 1.348 + break; 1.349 + if (*pos++) 1.350 + (void)putwchar(ch); 1.351 + } 1.352 + if (ch != '\n') { 1.353 + if (autostop) 1.354 + while ((ch = getwc(fp)) != WEOF && ch != '\n') 1.355 + (void)putwchar(ch); 1.356 + else 1.357 + while ((ch = getwc(fp)) != WEOF && ch != '\n'); 1.358 + } 1.359 + (void)putwchar('\n'); 1.360 + } 1.361 +out: 1.362 + if (ferror(fp)) { 1.363 + warn("%s", fname); 1.364 + return (1); 1.365 + } 1.366 + return (0); 1.367 +} 1.368 + 1.369 +static int 1.370 +is_delim(wchar_t ch) 1.371 +{ 1.372 + if (wflag) { 1.373 + if (ch == ' ' || ch == '\t') 1.374 + return 1; 1.375 + } else { 1.376 + if (ch == dchar) 1.377 + return 1; 1.378 + } 1.379 + return 0; 1.380 +} 1.381 + 1.382 +static int 1.383 +f_cut(FILE *fp, const char *fname) 1.384 +{ 1.385 + wchar_t ch; 1.386 + int field, i, isdelim; 1.387 + char *pos, *p; 1.388 + int output; 1.389 + char *lbuf, *mlbuf; 1.390 + size_t clen, lbuflen, reallen; 1.391 + 1.392 + mlbuf = NULL; 1.393 + while ((lbuf = fgetln(fp, &lbuflen)) != NULL) { 1.394 + reallen = lbuflen; 1.395 + /* Assert EOL has a newline. */ 1.396 + if (*(lbuf + lbuflen - 1) != '\n') { 1.397 + /* Can't have > 1 line with no trailing newline. */ 1.398 + mlbuf = malloc(lbuflen + 1); 1.399 + if (mlbuf == NULL) 1.400 + err(1, "malloc"); 1.401 + memcpy(mlbuf, lbuf, lbuflen); 1.402 + *(mlbuf + lbuflen) = '\n'; 1.403 + lbuf = mlbuf; 1.404 + reallen++; 1.405 + } 1.406 + output = 0; 1.407 + for (isdelim = 0, p = lbuf;; p += clen) { 1.408 + clen = mbrtowc(&ch, p, lbuf + reallen - p, NULL); 1.409 + if (clen == (size_t)-1 || clen == (size_t)-2) { 1.410 + warnc(EILSEQ, "%s", fname); 1.411 + free(mlbuf); 1.412 + return (1); 1.413 + } 1.414 + if (clen == 0) 1.415 + clen = 1; 1.416 + /* this should work if newline is delimiter */ 1.417 + if (is_delim(ch)) 1.418 + isdelim = 1; 1.419 + if (ch == '\n') { 1.420 + if (!isdelim && !sflag) 1.421 + (void)fwrite(lbuf, lbuflen, 1, stdout); 1.422 + break; 1.423 + } 1.424 + } 1.425 + if (!isdelim) 1.426 + continue; 1.427 + 1.428 + pos = positions + 1; 1.429 + for (field = maxval, p = lbuf; field; --field, ++pos) { 1.430 + if (*pos && output++) 1.431 + for (i = 0; dcharmb[i] != '\0'; i++) 1.432 + putchar(dcharmb[i]); 1.433 + for (;;) { 1.434 + clen = mbrtowc(&ch, p, lbuf + reallen - p, 1.435 + NULL); 1.436 + if (clen == (size_t)-1 || clen == (size_t)-2) { 1.437 + warnc(EILSEQ, "%s", fname); 1.438 + free(mlbuf); 1.439 + return (1); 1.440 + } 1.441 + if (clen == 0) 1.442 + clen = 1; 1.443 + p += clen; 1.444 + if (ch == '\n' || is_delim(ch)) { 1.445 + /* compress whitespace */ 1.446 + if (wflag && ch != '\n') 1.447 + while (is_delim(*p)) 1.448 + p++; 1.449 + break; 1.450 + } 1.451 + if (*pos) 1.452 + for (i = 0; i < (int)clen; i++) 1.453 + putchar(p[i - clen]); 1.454 + } 1.455 + if (ch == '\n') 1.456 + break; 1.457 + } 1.458 + if (ch != '\n') { 1.459 + if (autostop) { 1.460 + if (output) 1.461 + for (i = 0; dcharmb[i] != '\0'; i++) 1.462 + putchar(dcharmb[i]); 1.463 + for (; (ch = *p) != '\n'; ++p) 1.464 + (void)putchar(ch); 1.465 + } else 1.466 + for (; (ch = *p) != '\n'; ++p); 1.467 + } 1.468 + (void)putchar('\n'); 1.469 + } 1.470 + free(mlbuf); 1.471 + return (0); 1.472 +} 1.473 + 1.474 +static void 1.475 +usage(void) 1.476 +{ 1.477 + (void)fprintf(stderr, "%s\n%s\n%s\n", 1.478 + "usage: cut -b list [-n] [file ...]", 1.479 + " cut -c list [file ...]", 1.480 + " cut -f list [-s] [-w | -d delim] [file ...]"); 1.481 + exit(1); 1.482 +}