docs/cut
view code/cut.c__heirloom.2012-05-20 @ 31:106609b64dc4
minor corrections and improvements in the text
author | markus schnalke <meillo@marmaro.de> |
---|---|
date | Tue, 15 Sep 2015 17:20:20 +0200 |
parents | |
children |
line source
1 /*
2 * cut - cut out fields of lines of files
3 *
4 * Gunnar Ritter, Freiburg i. Br., Germany, December 2002.
5 */
6 /*
7 * Copyright (c) 2003 Gunnar Ritter
8 *
9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the authors be held liable for any damages
11 * arising from the use of this software.
12 *
13 * Permission is granted to anyone to use this software for any purpose,
14 * including commercial applications, and to alter it and redistribute
15 * it freely, subject to the following restrictions:
16 *
17 * 1. The origin of this software must not be misrepresented; you must not
18 * claim that you wrote the original software. If you use this software
19 * in a product, an acknowledgment in the product documentation would be
20 * appreciated but is not required.
21 *
22 * 2. Altered source versions must be plainly marked as such, and must not be
23 * misrepresented as being the original software.
24 *
25 * 3. This notice may not be removed or altered from any source distribution.
26 */
28 #if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4
29 #define USED __attribute__ ((used))
30 #elif defined __GNUC__
31 #define USED __attribute__ ((unused))
32 #else
33 #define USED
34 #endif
35 static const char sccsid[] USED = "@(#)cut.sl 1.20 (gritter) 5/29/05";
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <fcntl.h>
40 #include <unistd.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <stdlib.h>
44 #include <errno.h>
45 #include <libgen.h>
46 #include <limits.h>
47 #include <wchar.h>
48 #include <ctype.h>
49 #include <locale.h>
51 #include "iblok.h"
53 #if defined (__GLIBC__) && defined (_IO_putc_unlocked)
54 #undef putc
55 #define putc(c, f) _IO_putc_unlocked(c, f)
56 #endif
58 struct range {
59 struct range *r_nxt;
60 long r_min;
61 long r_max;
62 };
64 static unsigned errcnt; /* count of errors */
65 static int method; /* one of b, c, f */
66 static int nflag; /* character boundary bytes */
67 static int sflag; /* suppress lines w/o delimiters */
68 static char *progname; /* argv[0] to main() */
69 static wchar_t wcdelim = '\t'; /* delimiter character */
70 static const char *mbdelim = "\t";/* delimiter character */
71 struct range *fields; /* range list */
72 static int multibyte; /* multibyte LC_CTYPE */
74 #define next(wc, s) (multibyte ? mbtowc(&(wc), s, MB_LEN_MAX) :\
75 ((wc) = *(s) & 0377, (wc) != 0))
77 void *
78 lrealloc(void *vp, size_t nbytes)
79 {
80 void *p;
82 if ((p = realloc(vp, nbytes)) == NULL) {
83 write(2, "line too long\n", 14);
84 exit(076);
85 }
86 return p;
87 }
89 void *
90 smalloc(size_t nbytes)
91 {
92 void *p;
94 if ((p = malloc(nbytes)) == NULL) {
95 write(2, "no memory\n", 11);
96 exit(077);
97 }
98 return p;
99 }
101 static void
102 error(const char *s)
103 {
104 fprintf(stderr, "%s: ERROR: %s\n", progname, s);
105 exit(2);
106 }
108 static void
109 usage(void)
110 {
111 error("Usage: cut [-s] [-d<char>] {-c<list> | -f<list>} file ...");
112 }
114 static void
115 badlist(void)
116 {
117 error(method == 'b' ? "bad list for b/c/f option" :
118 "bad list for c/f option");
119 }
121 static void
122 setdelim(const char *s)
123 {
124 int n;
126 if ((n = next(wcdelim, s)) < 0 || (n > 0 && s[n] != '\0'))
127 error("no delimiter");
128 mbdelim = s;
129 }
131 static void
132 addrange(long m, long n)
133 {
134 struct range *rp, *rq;
136 rp = smalloc(sizeof *rp);
137 rp->r_nxt = NULL;
138 rp->r_min = m;
139 rp->r_max = n ? n : m;
140 if (fields) {
141 for (rq = fields; rq->r_nxt; rq = rq->r_nxt);
142 rq->r_nxt = rp;
143 } else
144 fields = rp;
145 }
147 static int
148 have(long i)
149 {
150 struct range *rp;
152 for (rp = fields; rp; rp = rp->r_nxt)
153 if (i >= rp->r_min && i <= rp->r_max)
154 return 1;
155 return 0;
156 }
158 #define mnreset() m = 0, n = 0, lp = &m
160 static void
161 setlist(const char *s)
162 {
163 char *cbuf, *cp;
164 long m, n;
165 long *lp;
167 fields = NULL;
168 cbuf = smalloc(strlen(s) + 1);
169 mnreset();
170 for (;;) {
171 if (*s == '-') {
172 if (m == 0)
173 m = 1;
174 n = LONG_MAX;
175 lp = &n;
176 s++;
177 } else if (*s == ',' || *s == ' ' || *s == '\t' || *s == '\0') {
178 if (m)
179 addrange(m, n);
180 mnreset();
181 if (*s == '\0')
182 break;
183 s++;
184 } else if (isdigit(*s & 0377)) {
185 cp = cbuf;
186 do
187 *cp++ = *s++;
188 while (isdigit(*s & 0377));
189 *cp = '\0';
190 *lp = strtol(cbuf, NULL, 10);
191 } else
192 badlist();
193 }
194 if (fields == NULL)
195 error("no fields");
196 free(cbuf);
197 }
199 static void
200 cutb(struct iblok *ip)
201 {
202 int c, i;
204 i = 1;
205 while ((c = ib_get(ip)) != EOF) {
206 if (c == '\n') {
207 i = 1;
208 putc(c, stdout);
209 } else if (have(i++))
210 putc(c, stdout);
211 }
212 }
214 static void
215 cutbn(struct iblok *ip)
216 {
217 char *cp;
218 int i, m, n;
219 wint_t wc;
221 i = 1;
222 while ((cp = ib_getw(ip, &wc, &n)) != NULL) {
223 if (wc == '\n') {
224 i = 1;
225 putc('\n', stdout);
226 } else {
227 if (have(i + n - 1))
228 for (m = 0; m < n; m++)
229 putc(cp[m], stdout);
230 i += n;
231 }
232 }
233 }
235 static void
236 cutc(struct iblok *ip)
237 {
238 char *cp;
239 int i, n, m;
240 wint_t wc;
242 i = 1;
243 while ((cp = ib_getw(ip, &wc, &n)) != NULL) {
244 if (wc == '\n') {
245 i = 1;
246 putc('\n', stdout);
247 } else if (wc != WEOF && have(i++)) {
248 for (m = 0; m < n; m++)
249 putc(cp[m], stdout);
250 }
251 }
252 }
254 static void
255 cutf(struct iblok *ip)
256 {
257 static char *line;
258 static size_t linesize;
259 char *cp, *lp, *lq;
260 int c, i, n, m, gotcha;
261 char b;
262 wint_t wc;
263 const int incr = 128;
265 if (linesize == 0)
266 line = smalloc(linesize = incr);
267 lp = line;
268 gotcha = 0;
269 i = 1;
270 do {
271 if (multibyte)
272 cp = ib_getw(ip, &wc, &n);
273 else {
274 if ((c = ib_get(ip)) != EOF) {
275 wc = c;
276 b = (char)c;
277 cp = &b;
278 } else {
279 wc = WEOF;
280 cp = NULL;
281 }
282 n = 1;
283 }
284 if (cp == NULL || wc == '\n' || wc == wcdelim) {
285 if (have(i) && (!sflag || gotcha || wc == wcdelim) ||
286 (!sflag && i == 1 &&
287 (cp == NULL || wc == '\n'))) {
288 if (gotcha)
289 for (m = 0; mbdelim[m]; m++)
290 putc(mbdelim[m], stdout);
291 for (lq = line; lq < lp; lq++)
292 putc(*lq, stdout);
293 gotcha = 1;
294 }
295 if (wc == '\n') {
296 if (gotcha)
297 putc('\n', stdout);
298 i = 1;
299 gotcha = 0;
300 } else
301 i++;
302 lp = line;
303 } else {
304 for (m = 0; m < n; m++) {
305 if (lp >= &line[linesize]) {
306 size_t diff = lp - line;
307 line = lrealloc(line, linesize += incr);
308 lp = &line[diff];
309 }
310 *lp++ = cp[m];
311 }
312 }
313 } while (cp != NULL);
314 }
316 static int
317 fdcut(int fd)
318 {
319 struct iblok *ip;
321 ip = ib_alloc(fd, 0);
322 switch (method) {
323 case 'b':
324 if (nflag && multibyte)
325 cutbn(ip);
326 else
327 cutb(ip);
328 break;
329 case 'c':
330 if (multibyte)
331 cutc(ip);
332 else
333 cutb(ip);
334 break;
335 case 'f':
336 cutf(ip);
337 break;
338 }
339 ib_free(ip);
340 return 0;
341 }
343 static int
344 fncut(const char *fn)
345 {
346 int fd, res;
348 if (fn[0] == '-' && fn[1] == '\0')
349 fd = 0;
350 else if ((fd = open(fn, O_RDONLY)) < 0) {
351 fprintf(stderr, "%s: WARNING: cannot open %s\n", progname, fn);
352 return 1;
353 }
354 res = fdcut(fd);
355 if (fd)
356 close(fd);
357 return res;
358 }
360 int
361 main(int argc, char **argv)
362 {
363 const char optstring[] = "b:c:d:f:ns";
364 int i;
366 progname = basename(argv[0]);
367 setlocale(LC_CTYPE, "");
368 multibyte = MB_CUR_MAX > 1;
369 #ifdef __GLIBC__
370 putenv("POSIXLY_CORRECT=1");
371 #endif
372 while ((i = getopt(argc, argv, optstring)) != EOF) {
373 switch (i) {
374 case 'b':
375 case 'c':
376 case 'f':
377 if (method && method != i)
378 usage();
379 method = i;
380 setlist(optarg);
381 break;
382 case 'd':
383 setdelim(optarg);
384 break;
385 case 'n':
386 nflag = 1;
387 break;
388 case 's':
389 sflag = 1;
390 break;
391 default:
392 usage();
393 }
394 }
395 /*if ((sflag && method != 'f') || (nflag && method != 'b'))
396 usage();*/
397 if (method == 0)
398 badlist();
399 if (argv[optind]) {
400 for (i = optind; argv[i]; i++)
401 errcnt |= fncut(argv[i]);
402 } else
403 errcnt |= fdcut(0);
404 return errcnt;
405 }