logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

shuf.c (4564B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 200809L
  5. #include "../config.h"
  6. #include "../libutils/getopt_nolong.h"
  7. #include <errno.h>
  8. #include <stdbool.h>
  9. #include <stdio.h> // getdelim, fprintf
  10. #include <stdlib.h> // free, malloc, srand, rand, exit, strtoul
  11. #include <string.h> // strerror, memcpy
  12. #include <time.h> // time
  13. #include <unistd.h> // getopt
  14. #ifdef HAS_GETOPT_LONG
  15. #include <getopt.h>
  16. #endif
  17. // Not a full shuffle, if there is more than 512 lines then last lines are never going to be printed first.
  18. // But this allows bounded memory usage.
  19. // FIXME: handle newline-less lines
  20. const char *argv0 = "shuf";
  21. #define LINES_LEN 512
  22. static char *lines[LINES_LEN];
  23. static char delim = '\n';
  24. char *line = NULL;
  25. size_t line_len = 0;
  26. unsigned long wrote = 0;
  27. unsigned long write_limit = 0;
  28. static int
  29. shuf(FILE *in, const char *fname)
  30. {
  31. for(int ln = 0;; ln++)
  32. {
  33. errno = 0;
  34. ssize_t nread = getdelim(&line, &line_len, delim, in);
  35. if(errno != 0)
  36. {
  37. fprintf(stderr,
  38. "%s: error: Failed reading line %d from file \"%s\": %s\n",
  39. argv0,
  40. ln,
  41. fname,
  42. strerror(errno));
  43. return 1;
  44. }
  45. if(nread <= 0) return 0;
  46. errno = 0;
  47. char *dup = malloc(nread + 1);
  48. if(!dup)
  49. {
  50. fprintf(
  51. stderr,
  52. "%s: error: Failed to allocate %zd bytes of memory for line %d from file \"%s\": %s\n",
  53. argv0,
  54. nread,
  55. ln,
  56. fname,
  57. strerror(errno));
  58. return 1;
  59. }
  60. memcpy(dup, line, nread);
  61. dup[nread] = '\0';
  62. int p = rand() % LINES_LEN;
  63. if(lines[p] != NULL)
  64. {
  65. fputs(lines[p], stdout);
  66. free(lines[p]);
  67. lines[p] = NULL;
  68. wrote++;
  69. if(write_limit != 0 && write_limit <= wrote) exit(0);
  70. }
  71. lines[p] = dup;
  72. }
  73. }
  74. static void
  75. usage(void)
  76. {
  77. fputs("\
  78. Usage: shuf [-z] [-n num] [files...]\n\
  79. shuf -e [-z] [-n num] [string...]\n\
  80. ",
  81. stderr);
  82. }
  83. int
  84. main(int argc, char *argv[])
  85. {
  86. bool e_flag = false;
  87. srand((int)time(NULL));
  88. #ifdef HAS_GETOPT_LONG
  89. // Strictly for GNUisms compatibility so no long-only options
  90. // clang-format off
  91. static struct option opts[] = {
  92. {"echo", no_argument, NULL, 'e'},
  93. {"head-count", required_argument, NULL, 'n'},
  94. {"zero-terminated", no_argument, NULL, 'z'},
  95. {0, 0, 0, 0},
  96. };
  97. // clang-format on
  98. // Need + as first character to get POSIX-style option parsing
  99. for(int c = -1; (c = getopt_long(argc, argv, "+:en:z", opts, NULL)) != -1;)
  100. #else
  101. for(int c = -1; (c = getopt_nolong(argc, argv, ":en:z")) != -1;)
  102. #endif
  103. {
  104. char *endptr = NULL;
  105. switch(c)
  106. {
  107. case 'e':
  108. e_flag = true;
  109. break;
  110. case 'n':
  111. write_limit = strtoul(optarg, &endptr, 0);
  112. if(errno != 0)
  113. {
  114. fprintf(stderr,
  115. "%s: error: Failed parsing number for `-n %s`: %s\n",
  116. argv0,
  117. optarg,
  118. strerror(errno));
  119. return 1;
  120. }
  121. if(endptr != NULL && *endptr != 0)
  122. {
  123. fprintf(stderr,
  124. "%s: error: Found extraneous characters while parsing `-n %s` as a number: %s\n",
  125. argv0,
  126. optarg,
  127. endptr);
  128. return 1;
  129. }
  130. break;
  131. case 'z':
  132. delim = '\0';
  133. break;
  134. case ':':
  135. fprintf(stderr, "%s: error: Missing operand for option: '-%c'\n", argv0, optopt);
  136. usage();
  137. return 1;
  138. case '?':
  139. GETOPT_UNKNOWN_OPT
  140. usage();
  141. return 1;
  142. }
  143. }
  144. argc -= optind;
  145. argv += optind;
  146. if(e_flag)
  147. {
  148. // Fisher-Yates shuffles
  149. for(int i = 0; i <= argc - 2; i++)
  150. {
  151. int p = rand() % argc;
  152. // swap
  153. char *tmp = argv[p];
  154. argv[p] = argv[argc - 1];
  155. argv[argc - 1] = tmp;
  156. }
  157. unsigned long limit = argc;
  158. if(write_limit != 0 && write_limit < limit) limit = write_limit;
  159. for(unsigned long i = 0; i < limit; i++)
  160. {
  161. printf("%s%c", argv[i], delim);
  162. }
  163. return 0;
  164. }
  165. for(int i = 0; i < LINES_LEN; i++)
  166. lines[i] = NULL;
  167. if(argc <= 0)
  168. {
  169. if(shuf(stdin, "<stdin>") != 0) return 1;
  170. }
  171. else
  172. {
  173. for(int i = 0; i < argc; i++)
  174. {
  175. if(strncmp(argv[i], "-", 2) == 0)
  176. {
  177. if(shuf(stdin, "<stdin>") != 0) return 1;
  178. continue;
  179. }
  180. FILE *in = fopen(argv[i], "r");
  181. if(shuf(in, argv[i]) != 0)
  182. {
  183. fclose(in);
  184. return 1;
  185. }
  186. fclose(in);
  187. }
  188. }
  189. // inserts are random so iterating on it is fine
  190. for(int i = 0; i < LINES_LEN; i++)
  191. {
  192. if(write_limit != 0 && write_limit <= wrote) break;
  193. if(lines[i] != NULL)
  194. {
  195. fputs(lines[i], stdout);
  196. free(lines[i]);
  197. wrote++;
  198. }
  199. }
  200. return 0;
  201. }