Mercurial > docs > cut
comparison code/cut.c__heirloom.2012-05-20 @ 14:21ad1c1548c4
Code ausgewaehlter Implementierungen eingefuegt
Das Datum entspricht dem Dateiaenderungsdatum.
author | markus schnalke <meillo@marmaro.de> |
---|---|
date | Tue, 12 May 2015 06:46:59 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
13:bf5e41260f89 | 14:21ad1c1548c4 |
---|---|
1 /* | |
2 * cut - cut out fields of lines of files | |
3 * | |
4 * Gunnar Ritter, Freiburg i. Br., Germany, December 2002. | |
5 */ | |
6 /* | |
7 * Copyright (c) 2003 Gunnar Ritter | |
8 * | |
9 * This software is provided 'as-is', without any express or implied | |
10 * warranty. In no event will the authors be held liable for any damages | |
11 * arising from the use of this software. | |
12 * | |
13 * Permission is granted to anyone to use this software for any purpose, | |
14 * including commercial applications, and to alter it and redistribute | |
15 * it freely, subject to the following restrictions: | |
16 * | |
17 * 1. The origin of this software must not be misrepresented; you must not | |
18 * claim that you wrote the original software. If you use this software | |
19 * in a product, an acknowledgment in the product documentation would be | |
20 * appreciated but is not required. | |
21 * | |
22 * 2. Altered source versions must be plainly marked as such, and must not be | |
23 * misrepresented as being the original software. | |
24 * | |
25 * 3. This notice may not be removed or altered from any source distribution. | |
26 */ | |
27 | |
28 #if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 | |
29 #define USED __attribute__ ((used)) | |
30 #elif defined __GNUC__ | |
31 #define USED __attribute__ ((unused)) | |
32 #else | |
33 #define USED | |
34 #endif | |
35 static const char sccsid[] USED = "@(#)cut.sl 1.20 (gritter) 5/29/05"; | |
36 | |
37 #include <sys/types.h> | |
38 #include <sys/stat.h> | |
39 #include <fcntl.h> | |
40 #include <unistd.h> | |
41 #include <stdio.h> | |
42 #include <string.h> | |
43 #include <stdlib.h> | |
44 #include <errno.h> | |
45 #include <libgen.h> | |
46 #include <limits.h> | |
47 #include <wchar.h> | |
48 #include <ctype.h> | |
49 #include <locale.h> | |
50 | |
51 #include "iblok.h" | |
52 | |
53 #if defined (__GLIBC__) && defined (_IO_putc_unlocked) | |
54 #undef putc | |
55 #define putc(c, f) _IO_putc_unlocked(c, f) | |
56 #endif | |
57 | |
58 struct range { | |
59 struct range *r_nxt; | |
60 long r_min; | |
61 long r_max; | |
62 }; | |
63 | |
64 static unsigned errcnt; /* count of errors */ | |
65 static int method; /* one of b, c, f */ | |
66 static int nflag; /* character boundary bytes */ | |
67 static int sflag; /* suppress lines w/o delimiters */ | |
68 static char *progname; /* argv[0] to main() */ | |
69 static wchar_t wcdelim = '\t'; /* delimiter character */ | |
70 static const char *mbdelim = "\t";/* delimiter character */ | |
71 struct range *fields; /* range list */ | |
72 static int multibyte; /* multibyte LC_CTYPE */ | |
73 | |
74 #define next(wc, s) (multibyte ? mbtowc(&(wc), s, MB_LEN_MAX) :\ | |
75 ((wc) = *(s) & 0377, (wc) != 0)) | |
76 | |
77 void * | |
78 lrealloc(void *vp, size_t nbytes) | |
79 { | |
80 void *p; | |
81 | |
82 if ((p = realloc(vp, nbytes)) == NULL) { | |
83 write(2, "line too long\n", 14); | |
84 exit(076); | |
85 } | |
86 return p; | |
87 } | |
88 | |
89 void * | |
90 smalloc(size_t nbytes) | |
91 { | |
92 void *p; | |
93 | |
94 if ((p = malloc(nbytes)) == NULL) { | |
95 write(2, "no memory\n", 11); | |
96 exit(077); | |
97 } | |
98 return p; | |
99 } | |
100 | |
101 static void | |
102 error(const char *s) | |
103 { | |
104 fprintf(stderr, "%s: ERROR: %s\n", progname, s); | |
105 exit(2); | |
106 } | |
107 | |
108 static void | |
109 usage(void) | |
110 { | |
111 error("Usage: cut [-s] [-d<char>] {-c<list> | -f<list>} file ..."); | |
112 } | |
113 | |
114 static void | |
115 badlist(void) | |
116 { | |
117 error(method == 'b' ? "bad list for b/c/f option" : | |
118 "bad list for c/f option"); | |
119 } | |
120 | |
121 static void | |
122 setdelim(const char *s) | |
123 { | |
124 int n; | |
125 | |
126 if ((n = next(wcdelim, s)) < 0 || (n > 0 && s[n] != '\0')) | |
127 error("no delimiter"); | |
128 mbdelim = s; | |
129 } | |
130 | |
131 static void | |
132 addrange(long m, long n) | |
133 { | |
134 struct range *rp, *rq; | |
135 | |
136 rp = smalloc(sizeof *rp); | |
137 rp->r_nxt = NULL; | |
138 rp->r_min = m; | |
139 rp->r_max = n ? n : m; | |
140 if (fields) { | |
141 for (rq = fields; rq->r_nxt; rq = rq->r_nxt); | |
142 rq->r_nxt = rp; | |
143 } else | |
144 fields = rp; | |
145 } | |
146 | |
147 static int | |
148 have(long i) | |
149 { | |
150 struct range *rp; | |
151 | |
152 for (rp = fields; rp; rp = rp->r_nxt) | |
153 if (i >= rp->r_min && i <= rp->r_max) | |
154 return 1; | |
155 return 0; | |
156 } | |
157 | |
158 #define mnreset() m = 0, n = 0, lp = &m | |
159 | |
160 static void | |
161 setlist(const char *s) | |
162 { | |
163 char *cbuf, *cp; | |
164 long m, n; | |
165 long *lp; | |
166 | |
167 fields = NULL; | |
168 cbuf = smalloc(strlen(s) + 1); | |
169 mnreset(); | |
170 for (;;) { | |
171 if (*s == '-') { | |
172 if (m == 0) | |
173 m = 1; | |
174 n = LONG_MAX; | |
175 lp = &n; | |
176 s++; | |
177 } else if (*s == ',' || *s == ' ' || *s == '\t' || *s == '\0') { | |
178 if (m) | |
179 addrange(m, n); | |
180 mnreset(); | |
181 if (*s == '\0') | |
182 break; | |
183 s++; | |
184 } else if (isdigit(*s & 0377)) { | |
185 cp = cbuf; | |
186 do | |
187 *cp++ = *s++; | |
188 while (isdigit(*s & 0377)); | |
189 *cp = '\0'; | |
190 *lp = strtol(cbuf, NULL, 10); | |
191 } else | |
192 badlist(); | |
193 } | |
194 if (fields == NULL) | |
195 error("no fields"); | |
196 free(cbuf); | |
197 } | |
198 | |
199 static void | |
200 cutb(struct iblok *ip) | |
201 { | |
202 int c, i; | |
203 | |
204 i = 1; | |
205 while ((c = ib_get(ip)) != EOF) { | |
206 if (c == '\n') { | |
207 i = 1; | |
208 putc(c, stdout); | |
209 } else if (have(i++)) | |
210 putc(c, stdout); | |
211 } | |
212 } | |
213 | |
214 static void | |
215 cutbn(struct iblok *ip) | |
216 { | |
217 char *cp; | |
218 int i, m, n; | |
219 wint_t wc; | |
220 | |
221 i = 1; | |
222 while ((cp = ib_getw(ip, &wc, &n)) != NULL) { | |
223 if (wc == '\n') { | |
224 i = 1; | |
225 putc('\n', stdout); | |
226 } else { | |
227 if (have(i + n - 1)) | |
228 for (m = 0; m < n; m++) | |
229 putc(cp[m], stdout); | |
230 i += n; | |
231 } | |
232 } | |
233 } | |
234 | |
235 static void | |
236 cutc(struct iblok *ip) | |
237 { | |
238 char *cp; | |
239 int i, n, m; | |
240 wint_t wc; | |
241 | |
242 i = 1; | |
243 while ((cp = ib_getw(ip, &wc, &n)) != NULL) { | |
244 if (wc == '\n') { | |
245 i = 1; | |
246 putc('\n', stdout); | |
247 } else if (wc != WEOF && have(i++)) { | |
248 for (m = 0; m < n; m++) | |
249 putc(cp[m], stdout); | |
250 } | |
251 } | |
252 } | |
253 | |
254 static void | |
255 cutf(struct iblok *ip) | |
256 { | |
257 static char *line; | |
258 static size_t linesize; | |
259 char *cp, *lp, *lq; | |
260 int c, i, n, m, gotcha; | |
261 char b; | |
262 wint_t wc; | |
263 const int incr = 128; | |
264 | |
265 if (linesize == 0) | |
266 line = smalloc(linesize = incr); | |
267 lp = line; | |
268 gotcha = 0; | |
269 i = 1; | |
270 do { | |
271 if (multibyte) | |
272 cp = ib_getw(ip, &wc, &n); | |
273 else { | |
274 if ((c = ib_get(ip)) != EOF) { | |
275 wc = c; | |
276 b = (char)c; | |
277 cp = &b; | |
278 } else { | |
279 wc = WEOF; | |
280 cp = NULL; | |
281 } | |
282 n = 1; | |
283 } | |
284 if (cp == NULL || wc == '\n' || wc == wcdelim) { | |
285 if (have(i) && (!sflag || gotcha || wc == wcdelim) || | |
286 (!sflag && i == 1 && | |
287 (cp == NULL || wc == '\n'))) { | |
288 if (gotcha) | |
289 for (m = 0; mbdelim[m]; m++) | |
290 putc(mbdelim[m], stdout); | |
291 for (lq = line; lq < lp; lq++) | |
292 putc(*lq, stdout); | |
293 gotcha = 1; | |
294 } | |
295 if (wc == '\n') { | |
296 if (gotcha) | |
297 putc('\n', stdout); | |
298 i = 1; | |
299 gotcha = 0; | |
300 } else | |
301 i++; | |
302 lp = line; | |
303 } else { | |
304 for (m = 0; m < n; m++) { | |
305 if (lp >= &line[linesize]) { | |
306 size_t diff = lp - line; | |
307 line = lrealloc(line, linesize += incr); | |
308 lp = &line[diff]; | |
309 } | |
310 *lp++ = cp[m]; | |
311 } | |
312 } | |
313 } while (cp != NULL); | |
314 } | |
315 | |
316 static int | |
317 fdcut(int fd) | |
318 { | |
319 struct iblok *ip; | |
320 | |
321 ip = ib_alloc(fd, 0); | |
322 switch (method) { | |
323 case 'b': | |
324 if (nflag && multibyte) | |
325 cutbn(ip); | |
326 else | |
327 cutb(ip); | |
328 break; | |
329 case 'c': | |
330 if (multibyte) | |
331 cutc(ip); | |
332 else | |
333 cutb(ip); | |
334 break; | |
335 case 'f': | |
336 cutf(ip); | |
337 break; | |
338 } | |
339 ib_free(ip); | |
340 return 0; | |
341 } | |
342 | |
343 static int | |
344 fncut(const char *fn) | |
345 { | |
346 int fd, res; | |
347 | |
348 if (fn[0] == '-' && fn[1] == '\0') | |
349 fd = 0; | |
350 else if ((fd = open(fn, O_RDONLY)) < 0) { | |
351 fprintf(stderr, "%s: WARNING: cannot open %s\n", progname, fn); | |
352 return 1; | |
353 } | |
354 res = fdcut(fd); | |
355 if (fd) | |
356 close(fd); | |
357 return res; | |
358 } | |
359 | |
360 int | |
361 main(int argc, char **argv) | |
362 { | |
363 const char optstring[] = "b:c:d:f:ns"; | |
364 int i; | |
365 | |
366 progname = basename(argv[0]); | |
367 setlocale(LC_CTYPE, ""); | |
368 multibyte = MB_CUR_MAX > 1; | |
369 #ifdef __GLIBC__ | |
370 putenv("POSIXLY_CORRECT=1"); | |
371 #endif | |
372 while ((i = getopt(argc, argv, optstring)) != EOF) { | |
373 switch (i) { | |
374 case 'b': | |
375 case 'c': | |
376 case 'f': | |
377 if (method && method != i) | |
378 usage(); | |
379 method = i; | |
380 setlist(optarg); | |
381 break; | |
382 case 'd': | |
383 setdelim(optarg); | |
384 break; | |
385 case 'n': | |
386 nflag = 1; | |
387 break; | |
388 case 's': | |
389 sflag = 1; | |
390 break; | |
391 default: | |
392 usage(); | |
393 } | |
394 } | |
395 /*if ((sflag && method != 'f') || (nflag && method != 'b')) | |
396 usage();*/ | |
397 if (method == 0) | |
398 badlist(); | |
399 if (argv[optind]) { | |
400 for (i = optind; argv[i]; i++) | |
401 errcnt |= fncut(argv[i]); | |
402 } else | |
403 errcnt |= fdcut(0); | |
404 return errcnt; | |
405 } |