logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

shuf.c (4636B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 200809L
  5. #include "../config.h"
  6. #include "../libutils/getopt_nolong.h"
  7. #include <errno.h>
  8. #include <stdbool.h>
  9. #include <stdio.h> // getdelim, fprintf
  10. #include <stdlib.h> // free, malloc, srand, rand, exit, strtoul
  11. #include <string.h> // strerror, memcpy
  12. #include <time.h> // time
  13. #include <unistd.h> // getopt
  14. #ifdef HAS_GETOPT_LONG
  15. #include <getopt.h>
  16. #endif
  17. // Not a full shuffle, if there is more than 512 lines then last lines are never going to be printed first.
  18. // But this allows bounded memory usage.
  19. // /!\ Make sure to modify the manpage as well if this gets changed /!\
  20. // FIXME: handle newline-less lines
  21. const char *argv0 = "shuf";
  22. #define LINES_LEN 512
  23. static char *lines[LINES_LEN];
  24. static char delim = '\n';
  25. char *line = NULL;
  26. size_t line_len = 0;
  27. unsigned long wrote = 0;
  28. unsigned long write_limit = 0;
  29. static int
  30. shuf(FILE *in, const char *fname)
  31. {
  32. for(int ln = 0;; ln++)
  33. {
  34. errno = 0;
  35. ssize_t nread = getdelim(&line, &line_len, delim, in);
  36. if(errno != 0)
  37. {
  38. fprintf(stderr,
  39. "%s: error: Failed reading line %d from file \"%s\": %s\n",
  40. argv0,
  41. ln,
  42. fname,
  43. strerror(errno));
  44. return 1;
  45. }
  46. if(nread <= 0) return 0;
  47. errno = 0;
  48. char *dup = malloc(nread + 1);
  49. if(!dup)
  50. {
  51. fprintf(
  52. stderr,
  53. "%s: error: Failed to allocate %zd bytes of memory for line %d from file \"%s\": %s\n",
  54. argv0,
  55. nread,
  56. ln,
  57. fname,
  58. strerror(errno));
  59. return 1;
  60. }
  61. memcpy(dup, line, nread);
  62. dup[nread] = '\0';
  63. int p = rand() % LINES_LEN;
  64. if(lines[p] != NULL)
  65. {
  66. fputs(lines[p], stdout);
  67. free(lines[p]);
  68. lines[p] = NULL;
  69. wrote++;
  70. if(write_limit != 0 && write_limit <= wrote) exit(0);
  71. }
  72. lines[p] = dup;
  73. }
  74. }
  75. static void
  76. usage(void)
  77. {
  78. fputs("\
  79. Usage: shuf [-z] [-n num] [files...]\n\
  80. shuf -e [-z] [-n num] [string...]\n\
  81. ",
  82. stderr);
  83. }
  84. int
  85. main(int argc, char *argv[])
  86. {
  87. bool e_flag = false;
  88. srand((int)time(NULL));
  89. #ifdef HAS_GETOPT_LONG
  90. // Strictly for GNUisms compatibility so no long-only options
  91. // clang-format off
  92. static struct option opts[] = {
  93. {"echo", no_argument, NULL, 'e'},
  94. {"head-count", required_argument, NULL, 'n'},
  95. {"zero-terminated", no_argument, NULL, 'z'},
  96. {0, 0, 0, 0},
  97. };
  98. // clang-format on
  99. // Need + as first character to get POSIX-style option parsing
  100. for(int c = -1; (c = getopt_long(argc, argv, "+:en:z", opts, NULL)) != -1;)
  101. #else
  102. for(int c = -1; (c = getopt_nolong(argc, argv, ":en:z")) != -1;)
  103. #endif
  104. {
  105. char *endptr = NULL;
  106. switch(c)
  107. {
  108. case 'e':
  109. e_flag = true;
  110. break;
  111. case 'n':
  112. write_limit = strtoul(optarg, &endptr, 0);
  113. if(errno != 0)
  114. {
  115. fprintf(stderr,
  116. "%s: error: Failed parsing number for `-n %s`: %s\n",
  117. argv0,
  118. optarg,
  119. strerror(errno));
  120. return 1;
  121. }
  122. if(endptr != NULL && *endptr != 0)
  123. {
  124. fprintf(stderr,
  125. "%s: error: Found extraneous characters while parsing `-n %s` as a number: %s\n",
  126. argv0,
  127. optarg,
  128. endptr);
  129. return 1;
  130. }
  131. break;
  132. case 'z':
  133. delim = '\0';
  134. break;
  135. case ':':
  136. fprintf(stderr, "%s: error: Missing operand for option: '-%c'\n", argv0, optopt);
  137. usage();
  138. return 1;
  139. case '?':
  140. GETOPT_UNKNOWN_OPT
  141. usage();
  142. return 1;
  143. }
  144. }
  145. argc -= optind;
  146. argv += optind;
  147. if(e_flag)
  148. {
  149. // Fisher-Yates shuffles
  150. for(int i = 0; i <= argc - 2; i++)
  151. {
  152. int p = rand() % argc;
  153. // swap
  154. char *tmp = argv[p];
  155. argv[p] = argv[argc - 1];
  156. argv[argc - 1] = tmp;
  157. }
  158. unsigned long limit = argc;
  159. if(write_limit != 0 && write_limit < limit) limit = write_limit;
  160. for(unsigned long i = 0; i < limit; i++)
  161. {
  162. printf("%s%c", argv[i], delim);
  163. }
  164. return 0;
  165. }
  166. for(int i = 0; i < LINES_LEN; i++)
  167. lines[i] = NULL;
  168. if(argc <= 0)
  169. {
  170. if(shuf(stdin, "<stdin>") != 0) return 1;
  171. }
  172. else
  173. {
  174. for(int i = 0; i < argc; i++)
  175. {
  176. if(strncmp(argv[i], "-", 2) == 0)
  177. {
  178. if(shuf(stdin, "<stdin>") != 0) return 1;
  179. continue;
  180. }
  181. FILE *in = fopen(argv[i], "r");
  182. if(shuf(in, argv[i]) != 0)
  183. {
  184. fclose(in);
  185. return 1;
  186. }
  187. fclose(in);
  188. }
  189. }
  190. // inserts are random so iterating on it is fine
  191. for(int i = 0; i < LINES_LEN; i++)
  192. {
  193. if(write_limit != 0 && write_limit <= wrote) break;
  194. if(lines[i] != NULL)
  195. {
  196. fputs(lines[i], stdout);
  197. free(lines[i]);
  198. wrote++;
  199. }
  200. }
  201. return 0;
  202. }