cut.c (8894B)
- // utils-std: Collection of commonly available Unix tools
- // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
- // SPDX-License-Identifier: MPL-2.0
- #define _POSIX_C_SOURCE 202405L
- #include "../lib/reallocarray.h"
- #include <assert.h>
- #include <ctype.h>
- #include <errno.h>
- #include <locale.h>
- #include <stdbool.h>
- #include <stdint.h> // size_t
- #include <stdio.h> // fprintf, fopen
- #include <string.h> // strerror
- #include <unistd.h> // getopt
- #include <wchar.h>
- #undef MIN
- #define MIN(a, b) (((a) < (b)) ? (a) : (b))
- enum cut_mode
- {
- CUT_MODE_NONE = 0,
- CUT_MODE_B = 1,
- CUT_MODE_C = 2,
- CUT_MODE_F = 3,
- };
- char delim = '\t';
- bool opt_n = false, opt_s = false;
- enum cut_mode mode = CUT_MODE_NONE;
- bool *list = NULL;
- size_t list_len = 0;
- bool nostop = false;
- const char *argv0 = "cut";
- static size_t
- parse_list_num(char **s)
- {
- char *endptr = NULL;
- errno = 0;
- size_t n = strtoul(*s, &endptr, 10);
- if(errno != 0)
- {
- fprintf(stderr, "%s: error: Failed parsing '%s' as a number: %s\n", argv0, *s, strerror(errno));
- return 0;
- }
- if(n < 1)
- {
- fprintf(stderr, "%s: error: Invalid number in list: %zu\n", argv0, n);
- return 0;
- }
- if(endptr != NULL && strchr(",-", *endptr) == NULL)
- {
- fprintf(stderr, "%s: error: Invalid character in list: %c\n", argv0, *endptr);
- return 0;
- }
- *s = endptr;
- return n;
- }
- static int
- parse_list(char *s)
- {
- while(true)
- {
- if(s == NULL || *s == '\0') break;
- if(*s == ',')
- {
- fprintf(stderr, "%s: error: Empty list element\n", argv0);
- return -1;
- }
- size_t min = 1;
- if(*s != '-')
- {
- min = parse_list_num(&s);
- if(min == 0) return -1;
- }
- // min-- as cut(1) is 1-indexed and max needs to be at least min+1
- size_t max = min--;
- if(s && *s == '-')
- {
- s++;
- if(!isdigit(*s))
- {
- nostop = true;
- }
- else
- {
- max = parse_list_num(&s);
- if(max == 0) return -1;
- if(max < min)
- {
- fprintf(stderr, "%s: error: Decreasing range: %zu-%zu\n", argv0, min, max);
- return -1;
- }
- }
- }
- // Needs to be after *s == '-'
- if(s && *s == ',') s++;
- if(max > list_len)
- {
- list = reallocarray(list, max, sizeof(*list));
- if(list == NULL)
- {
- fprintf(stderr, "%s: error: Failed memory allocation: %s\n", argv0, strerror(errno));
- return -1;
- }
- if(min > list_len)
- {
- memset(list + list_len, 0, min - list_len);
- }
- list_len = max;
- }
- memset(list + min, 1, max - min);
- }
- if(list_len == 0)
- {
- fprintf(stderr, "%s: error: Empty list\n", argv0);
- return -1;
- }
- return 0;
- }
- static int
- cut_b(FILE *in, const char *filename)
- {
- char *line = NULL;
- size_t line_len = 0;
- int err = 0;
- while(err == 0)
- {
- errno = 0;
- ssize_t nread = getline(&line, &line_len, in);
- if(nread < 0)
- {
- if(errno != 0)
- {
- fprintf(
- stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
- err = 1;
- }
- break;
- }
- if(nread == 0)
- {
- fputc('\n', stdout);
- continue;
- }
- if(line[nread - 1] == '\n') line[nread--] = '\0';
- for(size_t i = 0; i < MIN(list_len, (size_t)nread); i++)
- if(list[i]) fputc(line[i], stdout);
- if(nostop && (size_t)nread > list_len) fwrite(line + list_len, nread - list_len, 1, stdout);
- fputc('\n', stdout);
- }
- if(line_len != 0) free(line);
- return err;
- }
- static int
- cut_c(FILE *in, const char *filename)
- {
- char *line = NULL;
- size_t line_len = 0;
- int err = 0;
- wchar_t *line_w = NULL;
- ssize_t line_wsz = 0;
- while(err == 0)
- {
- errno = 0;
- ssize_t nread = getline(&line, &line_len, in);
- if(nread < 0)
- {
- if(errno != 0)
- {
- fprintf(
- stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
- err = 1;
- }
- break;
- }
- if(nread == 0)
- {
- fputc('\n', stdout);
- continue;
- }
- if(line[nread - 1] == '\n') line[nread--] = '\0';
- if(nread > line_wsz)
- {
- line_w = reallocarray(line_w, nread, sizeof(*line_w));
- if(line_w == NULL)
- {
- fprintf(stderr, "%s: error: Failed memory allocation: %s\n", argv0, strerror(errno));
- return -1;
- }
- line_wsz = nread;
- }
- assert(line_wsz > 0);
- size_t wcread = mbstowcs(line_w, line, line_wsz);
- if(wcread == (size_t)-1)
- {
- fprintf(stderr,
- "%s: error: Failed parsing characters in file '%s': %s\n",
- argv0,
- filename,
- strerror(errno));
- err = 1;
- break;
- }
- //DEBUG fprintf(stderr, "cut: mbstowcs(_, _, %zu) => %zu\n", nread, wcread);
- size_t i = 0;
- for(; i < MIN(list_len, wcread); i++)
- if(list[i]) fputwc(line_w[i], stdout);
- if(nostop && wcread > list_len)
- {
- for(; i < wcread; i++)
- fputwc(line_w[i], stdout);
- }
- fputc('\n', stdout);
- }
- if(line_len != 0) free(line);
- return err;
- }
- static int
- cut_f(FILE *in, const char *filename)
- {
- char *line = NULL;
- size_t line_len = 0;
- int err = 0;
- while(err == 0)
- {
- errno = 0;
- ssize_t nread = getline(&line, &line_len, in);
- if(nread < 0)
- {
- if(errno != 0)
- {
- fprintf(
- stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
- err = 1;
- }
- break;
- }
- if(nread == 0)
- {
- fputc('\n', stdout);
- continue;
- }
- if(line[nread - 1] == '\n') line[--nread] = '\0';
- size_t di = 0;
- for(; di < (size_t)nread; di++)
- if(line[di] == delim) break;
- if(di == (size_t)nread)
- {
- if(!opt_s) puts(line);
- continue;
- }
- bool need_sep = false;
- char *c = line;
- for(size_t pos = 0, i = 0; pos <= (size_t)nread; pos++)
- {
- if(pos < (size_t)nread && line[pos] != delim) continue;
- line[pos] = '\0';
- if(i >= list_len)
- {
- if(!nostop) break;
- if(need_sep) fputc(delim, stdout);
- fputs(c, stdout);
- need_sep = true;
- }
- else if(list[i])
- {
- if(need_sep) fputc(delim, stdout);
- fputs(c, stdout);
- need_sep = true;
- }
- i++;
- c = line + pos + 1;
- }
- fputc('\n', stdout);
- }
- if(line_len != 0) free(line);
- return err;
- }
- static int
- cut(FILE *in, const char *filename)
- {
- switch(mode)
- {
- case CUT_MODE_NONE:
- fprintf(stderr, "%s: error: No action (-b, -c, -f) specified\n", argv0);
- return 1;
- case CUT_MODE_B:
- return cut_b(in, filename);
- case CUT_MODE_C:
- return cut_c(in, filename);
- case CUT_MODE_F:
- return cut_f(in, filename);
- default:
- abort();
- }
- }
- int
- main(int argc, char *argv[])
- {
- char *opt_list = NULL;
- errno = 0;
- setlocale(LC_ALL, "");
- if(errno != 0)
- {
- fprintf(stderr, "%s: warning: Failed to initialize locales: %s\n", argv0, strerror(errno));
- errno = 0;
- }
- int c = -1;
- while((c = getopt(argc, argv, ":b:c:d:f:ns")) != -1)
- {
- switch(c)
- {
- case 'b':
- if(opt_list != NULL)
- {
- fprintf(stderr, "%s: error: Only one list may be specified\n", argv0);
- return 1;
- }
- mode = CUT_MODE_B;
- opt_list = optarg;
- break;
- case 'c':
- if(opt_list != NULL)
- {
- fprintf(stderr, "%s: error: Only one list may be specified\n", argv0);
- return 1;
- }
- mode = CUT_MODE_C;
- opt_list = optarg;
- break;
- case 'f':
- if(opt_list != NULL)
- {
- fprintf(stderr, "%s: error: Only one list may be specified\n", argv0);
- return 1;
- }
- mode = CUT_MODE_F;
- opt_list = optarg;
- break;
- case 'd':
- if(optarg[0] != '\0' && optarg[1] != '\0')
- {
- fprintf(stderr,
- "%s: error: Option '-d' only accepts single characters, got \"%s\"\n",
- argv0,
- optarg);
- return 1;
- }
- delim = optarg[0];
- break;
- case 'n':
- opt_n = true;
- break;
- case 's':
- opt_s = true;
- break;
- case ':':
- fprintf(stderr, "%s: error: Option '-%c' requires an operand\n", argv0, optopt);
- return 1;
- case '?':
- fprintf(stderr, "%s: error: Unhandled option '-%c'\n", argv0, optopt);
- return 1;
- default:
- fprintf(stderr, "%s: error: Unhandled getopt case '%c'\n", argv0, c);
- abort();
- }
- }
- argc -= optind;
- argv += optind;
- if(mode == CUT_MODE_NONE)
- {
- fprintf(stderr, "%s: error: No action (-b, -c, -f) specified\n", argv0);
- return 1;
- }
- if(parse_list(opt_list) < 0) return 1;
- #if 0
- fprintf(stderr, "[DEBUG] list: ");
- for(size_t i = 0; i < list_len; i++)
- {
- fputc(list[i] ? '1' : '0', stderr);
- }
- fputc('\n', stderr);
- #endif
- if(argc <= 0) return cut(stdin, "<stdin>");
- for(int i = 0; i < argc; i++)
- {
- FILE *in = fopen(argv[i], "r");
- if(in == NULL)
- {
- fprintf(stderr, "%s: error: Failed opening file '%s': %s\n", argv0, argv[i], strerror(errno));
- return 1;
- }
- int ret = cut(in, argv[i]);
- if(fclose(in) < 0)
- {
- fprintf(stderr, "%s: error: Failed closing file '%s': %s\n", argv0, argv[i], strerror(errno));
- return 1;
- }
- if(ret != 0) return 1;
- }
- if(fclose(stdin) != 0)
- {
- fprintf(stderr, "%s: error: Failed closing <stdin>: %s\n", argv0, strerror(errno));
- return 1;
- }
- if(fclose(stdout) != 0)
- {
- fprintf(stderr, "%s: error: Failed closing <stdout>: %s\n", argv0, strerror(errno));
- return 1;
- }
- return 0;
- }