docs/cut
diff code/cut.c__heirloom.2012-05-20 @ 14:21ad1c1548c4
Code ausgewaehlter Implementierungen eingefuegt
Das Datum entspricht dem Dateiaenderungsdatum.
author | markus schnalke <meillo@marmaro.de> |
---|---|
date | Tue, 12 May 2015 06:46:59 +0200 |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/code/cut.c__heirloom.2012-05-20 Tue May 12 06:46:59 2015 +0200 1.3 @@ -0,0 +1,405 @@ 1.4 +/* 1.5 + * cut - cut out fields of lines of files 1.6 + * 1.7 + * Gunnar Ritter, Freiburg i. Br., Germany, December 2002. 1.8 + */ 1.9 +/* 1.10 + * Copyright (c) 2003 Gunnar Ritter 1.11 + * 1.12 + * This software is provided 'as-is', without any express or implied 1.13 + * warranty. In no event will the authors be held liable for any damages 1.14 + * arising from the use of this software. 1.15 + * 1.16 + * Permission is granted to anyone to use this software for any purpose, 1.17 + * including commercial applications, and to alter it and redistribute 1.18 + * it freely, subject to the following restrictions: 1.19 + * 1.20 + * 1. The origin of this software must not be misrepresented; you must not 1.21 + * claim that you wrote the original software. If you use this software 1.22 + * in a product, an acknowledgment in the product documentation would be 1.23 + * appreciated but is not required. 1.24 + * 1.25 + * 2. Altered source versions must be plainly marked as such, and must not be 1.26 + * misrepresented as being the original software. 1.27 + * 1.28 + * 3. This notice may not be removed or altered from any source distribution. 1.29 + */ 1.30 + 1.31 +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 1.32 +#define USED __attribute__ ((used)) 1.33 +#elif defined __GNUC__ 1.34 +#define USED __attribute__ ((unused)) 1.35 +#else 1.36 +#define USED 1.37 +#endif 1.38 +static const char sccsid[] USED = "@(#)cut.sl 1.20 (gritter) 5/29/05"; 1.39 + 1.40 +#include <sys/types.h> 1.41 +#include <sys/stat.h> 1.42 +#include <fcntl.h> 1.43 +#include <unistd.h> 1.44 +#include <stdio.h> 1.45 +#include <string.h> 1.46 +#include <stdlib.h> 1.47 +#include <errno.h> 1.48 +#include <libgen.h> 1.49 +#include <limits.h> 1.50 +#include <wchar.h> 1.51 +#include <ctype.h> 1.52 +#include <locale.h> 1.53 + 1.54 +#include "iblok.h" 1.55 + 1.56 +#if defined (__GLIBC__) && defined (_IO_putc_unlocked) 1.57 +#undef putc 1.58 +#define putc(c, f) _IO_putc_unlocked(c, f) 1.59 +#endif 1.60 + 1.61 +struct range { 1.62 + struct range *r_nxt; 1.63 + long r_min; 1.64 + long r_max; 1.65 +}; 1.66 + 1.67 +static unsigned errcnt; /* count of errors */ 1.68 +static int method; /* one of b, c, f */ 1.69 +static int nflag; /* character boundary bytes */ 1.70 +static int sflag; /* suppress lines w/o delimiters */ 1.71 +static char *progname; /* argv[0] to main() */ 1.72 +static wchar_t wcdelim = '\t'; /* delimiter character */ 1.73 +static const char *mbdelim = "\t";/* delimiter character */ 1.74 +struct range *fields; /* range list */ 1.75 +static int multibyte; /* multibyte LC_CTYPE */ 1.76 + 1.77 +#define next(wc, s) (multibyte ? mbtowc(&(wc), s, MB_LEN_MAX) :\ 1.78 + ((wc) = *(s) & 0377, (wc) != 0)) 1.79 + 1.80 +void * 1.81 +lrealloc(void *vp, size_t nbytes) 1.82 +{ 1.83 + void *p; 1.84 + 1.85 + if ((p = realloc(vp, nbytes)) == NULL) { 1.86 + write(2, "line too long\n", 14); 1.87 + exit(076); 1.88 + } 1.89 + return p; 1.90 +} 1.91 + 1.92 +void * 1.93 +smalloc(size_t nbytes) 1.94 +{ 1.95 + void *p; 1.96 + 1.97 + if ((p = malloc(nbytes)) == NULL) { 1.98 + write(2, "no memory\n", 11); 1.99 + exit(077); 1.100 + } 1.101 + return p; 1.102 +} 1.103 + 1.104 +static void 1.105 +error(const char *s) 1.106 +{ 1.107 + fprintf(stderr, "%s: ERROR: %s\n", progname, s); 1.108 + exit(2); 1.109 +} 1.110 + 1.111 +static void 1.112 +usage(void) 1.113 +{ 1.114 + error("Usage: cut [-s] [-d<char>] {-c<list> | -f<list>} file ..."); 1.115 +} 1.116 + 1.117 +static void 1.118 +badlist(void) 1.119 +{ 1.120 + error(method == 'b' ? "bad list for b/c/f option" : 1.121 + "bad list for c/f option"); 1.122 +} 1.123 + 1.124 +static void 1.125 +setdelim(const char *s) 1.126 +{ 1.127 + int n; 1.128 + 1.129 + if ((n = next(wcdelim, s)) < 0 || (n > 0 && s[n] != '\0')) 1.130 + error("no delimiter"); 1.131 + mbdelim = s; 1.132 +} 1.133 + 1.134 +static void 1.135 +addrange(long m, long n) 1.136 +{ 1.137 + struct range *rp, *rq; 1.138 + 1.139 + rp = smalloc(sizeof *rp); 1.140 + rp->r_nxt = NULL; 1.141 + rp->r_min = m; 1.142 + rp->r_max = n ? n : m; 1.143 + if (fields) { 1.144 + for (rq = fields; rq->r_nxt; rq = rq->r_nxt); 1.145 + rq->r_nxt = rp; 1.146 + } else 1.147 + fields = rp; 1.148 +} 1.149 + 1.150 +static int 1.151 +have(long i) 1.152 +{ 1.153 + struct range *rp; 1.154 + 1.155 + for (rp = fields; rp; rp = rp->r_nxt) 1.156 + if (i >= rp->r_min && i <= rp->r_max) 1.157 + return 1; 1.158 + return 0; 1.159 +} 1.160 + 1.161 +#define mnreset() m = 0, n = 0, lp = &m 1.162 + 1.163 +static void 1.164 +setlist(const char *s) 1.165 +{ 1.166 + char *cbuf, *cp; 1.167 + long m, n; 1.168 + long *lp; 1.169 + 1.170 + fields = NULL; 1.171 + cbuf = smalloc(strlen(s) + 1); 1.172 + mnreset(); 1.173 + for (;;) { 1.174 + if (*s == '-') { 1.175 + if (m == 0) 1.176 + m = 1; 1.177 + n = LONG_MAX; 1.178 + lp = &n; 1.179 + s++; 1.180 + } else if (*s == ',' || *s == ' ' || *s == '\t' || *s == '\0') { 1.181 + if (m) 1.182 + addrange(m, n); 1.183 + mnreset(); 1.184 + if (*s == '\0') 1.185 + break; 1.186 + s++; 1.187 + } else if (isdigit(*s & 0377)) { 1.188 + cp = cbuf; 1.189 + do 1.190 + *cp++ = *s++; 1.191 + while (isdigit(*s & 0377)); 1.192 + *cp = '\0'; 1.193 + *lp = strtol(cbuf, NULL, 10); 1.194 + } else 1.195 + badlist(); 1.196 + } 1.197 + if (fields == NULL) 1.198 + error("no fields"); 1.199 + free(cbuf); 1.200 +} 1.201 + 1.202 +static void 1.203 +cutb(struct iblok *ip) 1.204 +{ 1.205 + int c, i; 1.206 + 1.207 + i = 1; 1.208 + while ((c = ib_get(ip)) != EOF) { 1.209 + if (c == '\n') { 1.210 + i = 1; 1.211 + putc(c, stdout); 1.212 + } else if (have(i++)) 1.213 + putc(c, stdout); 1.214 + } 1.215 +} 1.216 + 1.217 +static void 1.218 +cutbn(struct iblok *ip) 1.219 +{ 1.220 + char *cp; 1.221 + int i, m, n; 1.222 + wint_t wc; 1.223 + 1.224 + i = 1; 1.225 + while ((cp = ib_getw(ip, &wc, &n)) != NULL) { 1.226 + if (wc == '\n') { 1.227 + i = 1; 1.228 + putc('\n', stdout); 1.229 + } else { 1.230 + if (have(i + n - 1)) 1.231 + for (m = 0; m < n; m++) 1.232 + putc(cp[m], stdout); 1.233 + i += n; 1.234 + } 1.235 + } 1.236 +} 1.237 + 1.238 +static void 1.239 +cutc(struct iblok *ip) 1.240 +{ 1.241 + char *cp; 1.242 + int i, n, m; 1.243 + wint_t wc; 1.244 + 1.245 + i = 1; 1.246 + while ((cp = ib_getw(ip, &wc, &n)) != NULL) { 1.247 + if (wc == '\n') { 1.248 + i = 1; 1.249 + putc('\n', stdout); 1.250 + } else if (wc != WEOF && have(i++)) { 1.251 + for (m = 0; m < n; m++) 1.252 + putc(cp[m], stdout); 1.253 + } 1.254 + } 1.255 +} 1.256 + 1.257 +static void 1.258 +cutf(struct iblok *ip) 1.259 +{ 1.260 + static char *line; 1.261 + static size_t linesize; 1.262 + char *cp, *lp, *lq; 1.263 + int c, i, n, m, gotcha; 1.264 + char b; 1.265 + wint_t wc; 1.266 + const int incr = 128; 1.267 + 1.268 + if (linesize == 0) 1.269 + line = smalloc(linesize = incr); 1.270 + lp = line; 1.271 + gotcha = 0; 1.272 + i = 1; 1.273 + do { 1.274 + if (multibyte) 1.275 + cp = ib_getw(ip, &wc, &n); 1.276 + else { 1.277 + if ((c = ib_get(ip)) != EOF) { 1.278 + wc = c; 1.279 + b = (char)c; 1.280 + cp = &b; 1.281 + } else { 1.282 + wc = WEOF; 1.283 + cp = NULL; 1.284 + } 1.285 + n = 1; 1.286 + } 1.287 + if (cp == NULL || wc == '\n' || wc == wcdelim) { 1.288 + if (have(i) && (!sflag || gotcha || wc == wcdelim) || 1.289 + (!sflag && i == 1 && 1.290 + (cp == NULL || wc == '\n'))) { 1.291 + if (gotcha) 1.292 + for (m = 0; mbdelim[m]; m++) 1.293 + putc(mbdelim[m], stdout); 1.294 + for (lq = line; lq < lp; lq++) 1.295 + putc(*lq, stdout); 1.296 + gotcha = 1; 1.297 + } 1.298 + if (wc == '\n') { 1.299 + if (gotcha) 1.300 + putc('\n', stdout); 1.301 + i = 1; 1.302 + gotcha = 0; 1.303 + } else 1.304 + i++; 1.305 + lp = line; 1.306 + } else { 1.307 + for (m = 0; m < n; m++) { 1.308 + if (lp >= &line[linesize]) { 1.309 + size_t diff = lp - line; 1.310 + line = lrealloc(line, linesize += incr); 1.311 + lp = &line[diff]; 1.312 + } 1.313 + *lp++ = cp[m]; 1.314 + } 1.315 + } 1.316 + } while (cp != NULL); 1.317 +} 1.318 + 1.319 +static int 1.320 +fdcut(int fd) 1.321 +{ 1.322 + struct iblok *ip; 1.323 + 1.324 + ip = ib_alloc(fd, 0); 1.325 + switch (method) { 1.326 + case 'b': 1.327 + if (nflag && multibyte) 1.328 + cutbn(ip); 1.329 + else 1.330 + cutb(ip); 1.331 + break; 1.332 + case 'c': 1.333 + if (multibyte) 1.334 + cutc(ip); 1.335 + else 1.336 + cutb(ip); 1.337 + break; 1.338 + case 'f': 1.339 + cutf(ip); 1.340 + break; 1.341 + } 1.342 + ib_free(ip); 1.343 + return 0; 1.344 +} 1.345 + 1.346 +static int 1.347 +fncut(const char *fn) 1.348 +{ 1.349 + int fd, res; 1.350 + 1.351 + if (fn[0] == '-' && fn[1] == '\0') 1.352 + fd = 0; 1.353 + else if ((fd = open(fn, O_RDONLY)) < 0) { 1.354 + fprintf(stderr, "%s: WARNING: cannot open %s\n", progname, fn); 1.355 + return 1; 1.356 + } 1.357 + res = fdcut(fd); 1.358 + if (fd) 1.359 + close(fd); 1.360 + return res; 1.361 +} 1.362 + 1.363 +int 1.364 +main(int argc, char **argv) 1.365 +{ 1.366 + const char optstring[] = "b:c:d:f:ns"; 1.367 + int i; 1.368 + 1.369 + progname = basename(argv[0]); 1.370 + setlocale(LC_CTYPE, ""); 1.371 + multibyte = MB_CUR_MAX > 1; 1.372 +#ifdef __GLIBC__ 1.373 + putenv("POSIXLY_CORRECT=1"); 1.374 +#endif 1.375 + while ((i = getopt(argc, argv, optstring)) != EOF) { 1.376 + switch (i) { 1.377 + case 'b': 1.378 + case 'c': 1.379 + case 'f': 1.380 + if (method && method != i) 1.381 + usage(); 1.382 + method = i; 1.383 + setlist(optarg); 1.384 + break; 1.385 + case 'd': 1.386 + setdelim(optarg); 1.387 + break; 1.388 + case 'n': 1.389 + nflag = 1; 1.390 + break; 1.391 + case 's': 1.392 + sflag = 1; 1.393 + break; 1.394 + default: 1.395 + usage(); 1.396 + } 1.397 + } 1.398 + /*if ((sflag && method != 'f') || (nflag && method != 'b')) 1.399 + usage();*/ 1.400 + if (method == 0) 1.401 + badlist(); 1.402 + if (argv[optind]) { 1.403 + for (i = optind; argv[i]; i++) 1.404 + errcnt |= fncut(argv[i]); 1.405 + } else 1.406 + errcnt |= fdcut(0); 1.407 + return errcnt; 1.408 +}