rev |
line source |
meillo@14
|
1 /*
|
meillo@14
|
2 * cut - cut out fields of lines of files
|
meillo@14
|
3 *
|
meillo@14
|
4 * Gunnar Ritter, Freiburg i. Br., Germany, December 2002.
|
meillo@14
|
5 */
|
meillo@14
|
6 /*
|
meillo@14
|
7 * Copyright (c) 2003 Gunnar Ritter
|
meillo@14
|
8 *
|
meillo@14
|
9 * This software is provided 'as-is', without any express or implied
|
meillo@14
|
10 * warranty. In no event will the authors be held liable for any damages
|
meillo@14
|
11 * arising from the use of this software.
|
meillo@14
|
12 *
|
meillo@14
|
13 * Permission is granted to anyone to use this software for any purpose,
|
meillo@14
|
14 * including commercial applications, and to alter it and redistribute
|
meillo@14
|
15 * it freely, subject to the following restrictions:
|
meillo@14
|
16 *
|
meillo@14
|
17 * 1. The origin of this software must not be misrepresented; you must not
|
meillo@14
|
18 * claim that you wrote the original software. If you use this software
|
meillo@14
|
19 * in a product, an acknowledgment in the product documentation would be
|
meillo@14
|
20 * appreciated but is not required.
|
meillo@14
|
21 *
|
meillo@14
|
22 * 2. Altered source versions must be plainly marked as such, and must not be
|
meillo@14
|
23 * misrepresented as being the original software.
|
meillo@14
|
24 *
|
meillo@14
|
25 * 3. This notice may not be removed or altered from any source distribution.
|
meillo@14
|
26 */
|
meillo@14
|
27
|
meillo@14
|
28 #if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4
|
meillo@14
|
29 #define USED __attribute__ ((used))
|
meillo@14
|
30 #elif defined __GNUC__
|
meillo@14
|
31 #define USED __attribute__ ((unused))
|
meillo@14
|
32 #else
|
meillo@14
|
33 #define USED
|
meillo@14
|
34 #endif
|
meillo@14
|
35 static const char sccsid[] USED = "@(#)cut.sl 1.20 (gritter) 5/29/05";
|
meillo@14
|
36
|
meillo@14
|
37 #include <sys/types.h>
|
meillo@14
|
38 #include <sys/stat.h>
|
meillo@14
|
39 #include <fcntl.h>
|
meillo@14
|
40 #include <unistd.h>
|
meillo@14
|
41 #include <stdio.h>
|
meillo@14
|
42 #include <string.h>
|
meillo@14
|
43 #include <stdlib.h>
|
meillo@14
|
44 #include <errno.h>
|
meillo@14
|
45 #include <libgen.h>
|
meillo@14
|
46 #include <limits.h>
|
meillo@14
|
47 #include <wchar.h>
|
meillo@14
|
48 #include <ctype.h>
|
meillo@14
|
49 #include <locale.h>
|
meillo@14
|
50
|
meillo@14
|
51 #include "iblok.h"
|
meillo@14
|
52
|
meillo@14
|
53 #if defined (__GLIBC__) && defined (_IO_putc_unlocked)
|
meillo@14
|
54 #undef putc
|
meillo@14
|
55 #define putc(c, f) _IO_putc_unlocked(c, f)
|
meillo@14
|
56 #endif
|
meillo@14
|
57
|
meillo@14
|
58 struct range {
|
meillo@14
|
59 struct range *r_nxt;
|
meillo@14
|
60 long r_min;
|
meillo@14
|
61 long r_max;
|
meillo@14
|
62 };
|
meillo@14
|
63
|
meillo@14
|
64 static unsigned errcnt; /* count of errors */
|
meillo@14
|
65 static int method; /* one of b, c, f */
|
meillo@14
|
66 static int nflag; /* character boundary bytes */
|
meillo@14
|
67 static int sflag; /* suppress lines w/o delimiters */
|
meillo@14
|
68 static char *progname; /* argv[0] to main() */
|
meillo@14
|
69 static wchar_t wcdelim = '\t'; /* delimiter character */
|
meillo@14
|
70 static const char *mbdelim = "\t";/* delimiter character */
|
meillo@14
|
71 struct range *fields; /* range list */
|
meillo@14
|
72 static int multibyte; /* multibyte LC_CTYPE */
|
meillo@14
|
73
|
meillo@14
|
74 #define next(wc, s) (multibyte ? mbtowc(&(wc), s, MB_LEN_MAX) :\
|
meillo@14
|
75 ((wc) = *(s) & 0377, (wc) != 0))
|
meillo@14
|
76
|
meillo@14
|
77 void *
|
meillo@14
|
78 lrealloc(void *vp, size_t nbytes)
|
meillo@14
|
79 {
|
meillo@14
|
80 void *p;
|
meillo@14
|
81
|
meillo@14
|
82 if ((p = realloc(vp, nbytes)) == NULL) {
|
meillo@14
|
83 write(2, "line too long\n", 14);
|
meillo@14
|
84 exit(076);
|
meillo@14
|
85 }
|
meillo@14
|
86 return p;
|
meillo@14
|
87 }
|
meillo@14
|
88
|
meillo@14
|
89 void *
|
meillo@14
|
90 smalloc(size_t nbytes)
|
meillo@14
|
91 {
|
meillo@14
|
92 void *p;
|
meillo@14
|
93
|
meillo@14
|
94 if ((p = malloc(nbytes)) == NULL) {
|
meillo@14
|
95 write(2, "no memory\n", 11);
|
meillo@14
|
96 exit(077);
|
meillo@14
|
97 }
|
meillo@14
|
98 return p;
|
meillo@14
|
99 }
|
meillo@14
|
100
|
meillo@14
|
101 static void
|
meillo@14
|
102 error(const char *s)
|
meillo@14
|
103 {
|
meillo@14
|
104 fprintf(stderr, "%s: ERROR: %s\n", progname, s);
|
meillo@14
|
105 exit(2);
|
meillo@14
|
106 }
|
meillo@14
|
107
|
meillo@14
|
108 static void
|
meillo@14
|
109 usage(void)
|
meillo@14
|
110 {
|
meillo@14
|
111 error("Usage: cut [-s] [-d<char>] {-c<list> | -f<list>} file ...");
|
meillo@14
|
112 }
|
meillo@14
|
113
|
meillo@14
|
114 static void
|
meillo@14
|
115 badlist(void)
|
meillo@14
|
116 {
|
meillo@14
|
117 error(method == 'b' ? "bad list for b/c/f option" :
|
meillo@14
|
118 "bad list for c/f option");
|
meillo@14
|
119 }
|
meillo@14
|
120
|
meillo@14
|
121 static void
|
meillo@14
|
122 setdelim(const char *s)
|
meillo@14
|
123 {
|
meillo@14
|
124 int n;
|
meillo@14
|
125
|
meillo@14
|
126 if ((n = next(wcdelim, s)) < 0 || (n > 0 && s[n] != '\0'))
|
meillo@14
|
127 error("no delimiter");
|
meillo@14
|
128 mbdelim = s;
|
meillo@14
|
129 }
|
meillo@14
|
130
|
meillo@14
|
131 static void
|
meillo@14
|
132 addrange(long m, long n)
|
meillo@14
|
133 {
|
meillo@14
|
134 struct range *rp, *rq;
|
meillo@14
|
135
|
meillo@14
|
136 rp = smalloc(sizeof *rp);
|
meillo@14
|
137 rp->r_nxt = NULL;
|
meillo@14
|
138 rp->r_min = m;
|
meillo@14
|
139 rp->r_max = n ? n : m;
|
meillo@14
|
140 if (fields) {
|
meillo@14
|
141 for (rq = fields; rq->r_nxt; rq = rq->r_nxt);
|
meillo@14
|
142 rq->r_nxt = rp;
|
meillo@14
|
143 } else
|
meillo@14
|
144 fields = rp;
|
meillo@14
|
145 }
|
meillo@14
|
146
|
meillo@14
|
147 static int
|
meillo@14
|
148 have(long i)
|
meillo@14
|
149 {
|
meillo@14
|
150 struct range *rp;
|
meillo@14
|
151
|
meillo@14
|
152 for (rp = fields; rp; rp = rp->r_nxt)
|
meillo@14
|
153 if (i >= rp->r_min && i <= rp->r_max)
|
meillo@14
|
154 return 1;
|
meillo@14
|
155 return 0;
|
meillo@14
|
156 }
|
meillo@14
|
157
|
meillo@14
|
158 #define mnreset() m = 0, n = 0, lp = &m
|
meillo@14
|
159
|
meillo@14
|
160 static void
|
meillo@14
|
161 setlist(const char *s)
|
meillo@14
|
162 {
|
meillo@14
|
163 char *cbuf, *cp;
|
meillo@14
|
164 long m, n;
|
meillo@14
|
165 long *lp;
|
meillo@14
|
166
|
meillo@14
|
167 fields = NULL;
|
meillo@14
|
168 cbuf = smalloc(strlen(s) + 1);
|
meillo@14
|
169 mnreset();
|
meillo@14
|
170 for (;;) {
|
meillo@14
|
171 if (*s == '-') {
|
meillo@14
|
172 if (m == 0)
|
meillo@14
|
173 m = 1;
|
meillo@14
|
174 n = LONG_MAX;
|
meillo@14
|
175 lp = &n;
|
meillo@14
|
176 s++;
|
meillo@14
|
177 } else if (*s == ',' || *s == ' ' || *s == '\t' || *s == '\0') {
|
meillo@14
|
178 if (m)
|
meillo@14
|
179 addrange(m, n);
|
meillo@14
|
180 mnreset();
|
meillo@14
|
181 if (*s == '\0')
|
meillo@14
|
182 break;
|
meillo@14
|
183 s++;
|
meillo@14
|
184 } else if (isdigit(*s & 0377)) {
|
meillo@14
|
185 cp = cbuf;
|
meillo@14
|
186 do
|
meillo@14
|
187 *cp++ = *s++;
|
meillo@14
|
188 while (isdigit(*s & 0377));
|
meillo@14
|
189 *cp = '\0';
|
meillo@14
|
190 *lp = strtol(cbuf, NULL, 10);
|
meillo@14
|
191 } else
|
meillo@14
|
192 badlist();
|
meillo@14
|
193 }
|
meillo@14
|
194 if (fields == NULL)
|
meillo@14
|
195 error("no fields");
|
meillo@14
|
196 free(cbuf);
|
meillo@14
|
197 }
|
meillo@14
|
198
|
meillo@14
|
199 static void
|
meillo@14
|
200 cutb(struct iblok *ip)
|
meillo@14
|
201 {
|
meillo@14
|
202 int c, i;
|
meillo@14
|
203
|
meillo@14
|
204 i = 1;
|
meillo@14
|
205 while ((c = ib_get(ip)) != EOF) {
|
meillo@14
|
206 if (c == '\n') {
|
meillo@14
|
207 i = 1;
|
meillo@14
|
208 putc(c, stdout);
|
meillo@14
|
209 } else if (have(i++))
|
meillo@14
|
210 putc(c, stdout);
|
meillo@14
|
211 }
|
meillo@14
|
212 }
|
meillo@14
|
213
|
meillo@14
|
214 static void
|
meillo@14
|
215 cutbn(struct iblok *ip)
|
meillo@14
|
216 {
|
meillo@14
|
217 char *cp;
|
meillo@14
|
218 int i, m, n;
|
meillo@14
|
219 wint_t wc;
|
meillo@14
|
220
|
meillo@14
|
221 i = 1;
|
meillo@14
|
222 while ((cp = ib_getw(ip, &wc, &n)) != NULL) {
|
meillo@14
|
223 if (wc == '\n') {
|
meillo@14
|
224 i = 1;
|
meillo@14
|
225 putc('\n', stdout);
|
meillo@14
|
226 } else {
|
meillo@14
|
227 if (have(i + n - 1))
|
meillo@14
|
228 for (m = 0; m < n; m++)
|
meillo@14
|
229 putc(cp[m], stdout);
|
meillo@14
|
230 i += n;
|
meillo@14
|
231 }
|
meillo@14
|
232 }
|
meillo@14
|
233 }
|
meillo@14
|
234
|
meillo@14
|
235 static void
|
meillo@14
|
236 cutc(struct iblok *ip)
|
meillo@14
|
237 {
|
meillo@14
|
238 char *cp;
|
meillo@14
|
239 int i, n, m;
|
meillo@14
|
240 wint_t wc;
|
meillo@14
|
241
|
meillo@14
|
242 i = 1;
|
meillo@14
|
243 while ((cp = ib_getw(ip, &wc, &n)) != NULL) {
|
meillo@14
|
244 if (wc == '\n') {
|
meillo@14
|
245 i = 1;
|
meillo@14
|
246 putc('\n', stdout);
|
meillo@14
|
247 } else if (wc != WEOF && have(i++)) {
|
meillo@14
|
248 for (m = 0; m < n; m++)
|
meillo@14
|
249 putc(cp[m], stdout);
|
meillo@14
|
250 }
|
meillo@14
|
251 }
|
meillo@14
|
252 }
|
meillo@14
|
253
|
meillo@14
|
254 static void
|
meillo@14
|
255 cutf(struct iblok *ip)
|
meillo@14
|
256 {
|
meillo@14
|
257 static char *line;
|
meillo@14
|
258 static size_t linesize;
|
meillo@14
|
259 char *cp, *lp, *lq;
|
meillo@14
|
260 int c, i, n, m, gotcha;
|
meillo@14
|
261 char b;
|
meillo@14
|
262 wint_t wc;
|
meillo@14
|
263 const int incr = 128;
|
meillo@14
|
264
|
meillo@14
|
265 if (linesize == 0)
|
meillo@14
|
266 line = smalloc(linesize = incr);
|
meillo@14
|
267 lp = line;
|
meillo@14
|
268 gotcha = 0;
|
meillo@14
|
269 i = 1;
|
meillo@14
|
270 do {
|
meillo@14
|
271 if (multibyte)
|
meillo@14
|
272 cp = ib_getw(ip, &wc, &n);
|
meillo@14
|
273 else {
|
meillo@14
|
274 if ((c = ib_get(ip)) != EOF) {
|
meillo@14
|
275 wc = c;
|
meillo@14
|
276 b = (char)c;
|
meillo@14
|
277 cp = &b;
|
meillo@14
|
278 } else {
|
meillo@14
|
279 wc = WEOF;
|
meillo@14
|
280 cp = NULL;
|
meillo@14
|
281 }
|
meillo@14
|
282 n = 1;
|
meillo@14
|
283 }
|
meillo@14
|
284 if (cp == NULL || wc == '\n' || wc == wcdelim) {
|
meillo@14
|
285 if (have(i) && (!sflag || gotcha || wc == wcdelim) ||
|
meillo@14
|
286 (!sflag && i == 1 &&
|
meillo@14
|
287 (cp == NULL || wc == '\n'))) {
|
meillo@14
|
288 if (gotcha)
|
meillo@14
|
289 for (m = 0; mbdelim[m]; m++)
|
meillo@14
|
290 putc(mbdelim[m], stdout);
|
meillo@14
|
291 for (lq = line; lq < lp; lq++)
|
meillo@14
|
292 putc(*lq, stdout);
|
meillo@14
|
293 gotcha = 1;
|
meillo@14
|
294 }
|
meillo@14
|
295 if (wc == '\n') {
|
meillo@14
|
296 if (gotcha)
|
meillo@14
|
297 putc('\n', stdout);
|
meillo@14
|
298 i = 1;
|
meillo@14
|
299 gotcha = 0;
|
meillo@14
|
300 } else
|
meillo@14
|
301 i++;
|
meillo@14
|
302 lp = line;
|
meillo@14
|
303 } else {
|
meillo@14
|
304 for (m = 0; m < n; m++) {
|
meillo@14
|
305 if (lp >= &line[linesize]) {
|
meillo@14
|
306 size_t diff = lp - line;
|
meillo@14
|
307 line = lrealloc(line, linesize += incr);
|
meillo@14
|
308 lp = &line[diff];
|
meillo@14
|
309 }
|
meillo@14
|
310 *lp++ = cp[m];
|
meillo@14
|
311 }
|
meillo@14
|
312 }
|
meillo@14
|
313 } while (cp != NULL);
|
meillo@14
|
314 }
|
meillo@14
|
315
|
meillo@14
|
316 static int
|
meillo@14
|
317 fdcut(int fd)
|
meillo@14
|
318 {
|
meillo@14
|
319 struct iblok *ip;
|
meillo@14
|
320
|
meillo@14
|
321 ip = ib_alloc(fd, 0);
|
meillo@14
|
322 switch (method) {
|
meillo@14
|
323 case 'b':
|
meillo@14
|
324 if (nflag && multibyte)
|
meillo@14
|
325 cutbn(ip);
|
meillo@14
|
326 else
|
meillo@14
|
327 cutb(ip);
|
meillo@14
|
328 break;
|
meillo@14
|
329 case 'c':
|
meillo@14
|
330 if (multibyte)
|
meillo@14
|
331 cutc(ip);
|
meillo@14
|
332 else
|
meillo@14
|
333 cutb(ip);
|
meillo@14
|
334 break;
|
meillo@14
|
335 case 'f':
|
meillo@14
|
336 cutf(ip);
|
meillo@14
|
337 break;
|
meillo@14
|
338 }
|
meillo@14
|
339 ib_free(ip);
|
meillo@14
|
340 return 0;
|
meillo@14
|
341 }
|
meillo@14
|
342
|
meillo@14
|
343 static int
|
meillo@14
|
344 fncut(const char *fn)
|
meillo@14
|
345 {
|
meillo@14
|
346 int fd, res;
|
meillo@14
|
347
|
meillo@14
|
348 if (fn[0] == '-' && fn[1] == '\0')
|
meillo@14
|
349 fd = 0;
|
meillo@14
|
350 else if ((fd = open(fn, O_RDONLY)) < 0) {
|
meillo@14
|
351 fprintf(stderr, "%s: WARNING: cannot open %s\n", progname, fn);
|
meillo@14
|
352 return 1;
|
meillo@14
|
353 }
|
meillo@14
|
354 res = fdcut(fd);
|
meillo@14
|
355 if (fd)
|
meillo@14
|
356 close(fd);
|
meillo@14
|
357 return res;
|
meillo@14
|
358 }
|
meillo@14
|
359
|
meillo@14
|
360 int
|
meillo@14
|
361 main(int argc, char **argv)
|
meillo@14
|
362 {
|
meillo@14
|
363 const char optstring[] = "b:c:d:f:ns";
|
meillo@14
|
364 int i;
|
meillo@14
|
365
|
meillo@14
|
366 progname = basename(argv[0]);
|
meillo@14
|
367 setlocale(LC_CTYPE, "");
|
meillo@14
|
368 multibyte = MB_CUR_MAX > 1;
|
meillo@14
|
369 #ifdef __GLIBC__
|
meillo@14
|
370 putenv("POSIXLY_CORRECT=1");
|
meillo@14
|
371 #endif
|
meillo@14
|
372 while ((i = getopt(argc, argv, optstring)) != EOF) {
|
meillo@14
|
373 switch (i) {
|
meillo@14
|
374 case 'b':
|
meillo@14
|
375 case 'c':
|
meillo@14
|
376 case 'f':
|
meillo@14
|
377 if (method && method != i)
|
meillo@14
|
378 usage();
|
meillo@14
|
379 method = i;
|
meillo@14
|
380 setlist(optarg);
|
meillo@14
|
381 break;
|
meillo@14
|
382 case 'd':
|
meillo@14
|
383 setdelim(optarg);
|
meillo@14
|
384 break;
|
meillo@14
|
385 case 'n':
|
meillo@14
|
386 nflag = 1;
|
meillo@14
|
387 break;
|
meillo@14
|
388 case 's':
|
meillo@14
|
389 sflag = 1;
|
meillo@14
|
390 break;
|
meillo@14
|
391 default:
|
meillo@14
|
392 usage();
|
meillo@14
|
393 }
|
meillo@14
|
394 }
|
meillo@14
|
395 /*if ((sflag && method != 'f') || (nflag && method != 'b'))
|
meillo@14
|
396 usage();*/
|
meillo@14
|
397 if (method == 0)
|
meillo@14
|
398 badlist();
|
meillo@14
|
399 if (argv[optind]) {
|
meillo@14
|
400 for (i = optind; argv[i]; i++)
|
meillo@14
|
401 errcnt |= fncut(argv[i]);
|
meillo@14
|
402 } else
|
meillo@14
|
403 errcnt |= fdcut(0);
|
meillo@14
|
404 return errcnt;
|
meillo@14
|
405 }
|