logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

shuf.c (4004B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 200809L
  5. #include "../lib/getopt_nolong.h"
  6. #include <errno.h>
  7. #include <stdbool.h>
  8. #include <stdio.h> // getdelim, fprintf
  9. #include <stdlib.h> // free, malloc, srand, rand, exit, strtoul
  10. #include <string.h> // strerror, memcpy
  11. #include <time.h> // time
  12. #include <unistd.h> // getopt
  13. // Not a full shuffle, if there is more than 512 lines then last lines are never going to be printed first.
  14. // But this allows bounded memory usage.
  15. // FIXME: handle newline-less lines
  16. const char *argv0 = "shuf";
  17. #define LINES_LEN 512
  18. static char *lines[LINES_LEN];
  19. static char delim = '\n';
  20. char *line = NULL;
  21. size_t line_len = 0;
  22. unsigned long wrote = 0;
  23. unsigned long write_limit = 0;
  24. static int
  25. shuf(FILE *in, const char *fname)
  26. {
  27. for(int ln = 0;; ln++)
  28. {
  29. errno = 0;
  30. ssize_t nread = getdelim(&line, &line_len, delim, in);
  31. if(errno != 0)
  32. {
  33. fprintf(stderr,
  34. "%s: error: Failed reading line %d from file \"%s\": %s\n",
  35. argv0,
  36. ln,
  37. fname,
  38. strerror(errno));
  39. return 1;
  40. }
  41. if(nread < 0) return 0;
  42. errno = 0;
  43. char *dup = malloc(nread);
  44. if(!dup)
  45. {
  46. fprintf(
  47. stderr,
  48. "%s: error: Failed to allocate %zd bytes of memory for line %d from file \"%s\": %s\n",
  49. argv0,
  50. nread,
  51. ln,
  52. fname,
  53. strerror(errno));
  54. return 1;
  55. }
  56. memcpy(dup, line, nread);
  57. int p = rand() % LINES_LEN;
  58. if(lines[p] != NULL)
  59. {
  60. fputs(lines[p], stdout);
  61. free(lines[p]);
  62. lines[p] = NULL;
  63. wrote++;
  64. if(write_limit != 0 && write_limit <= wrote) exit(0);
  65. }
  66. lines[p] = dup;
  67. }
  68. }
  69. static void
  70. usage(void)
  71. {
  72. fputs("Usage: shuf [-z] [files...]\n", stderr);
  73. }
  74. int
  75. main(int argc, char *argv[])
  76. {
  77. bool e_flag = false;
  78. srand((int)time(NULL));
  79. for(int c = -1; (c = getopt_nolong(argc, argv, ":en:z")) != -1;)
  80. {
  81. char *endptr = NULL;
  82. switch(c)
  83. {
  84. case 'e':
  85. e_flag = true;
  86. break;
  87. case 'n':
  88. write_limit = strtoul(optarg, &endptr, 0);
  89. if(errno != 0)
  90. {
  91. fprintf(stderr,
  92. "%s: error: Failed parsing number for `-n %s`: %s\n",
  93. argv0,
  94. optarg,
  95. strerror(errno));
  96. return 1;
  97. }
  98. if(endptr != NULL && *endptr != 0)
  99. {
  100. fprintf(stderr,
  101. "%s: error: Found extraneous characters while parsing `-n %s` as a number: %s\n",
  102. argv0,
  103. optarg,
  104. endptr);
  105. return 1;
  106. }
  107. break;
  108. case 'z':
  109. delim = '\0';
  110. break;
  111. case ':':
  112. fprintf(stderr, "%s: error: Missing operand for option: '-%c'\n", argv0, optopt);
  113. usage();
  114. return 1;
  115. case '?':
  116. if(!got_long_opt) fprintf(stderr, "%s: error: Unrecognised option: '-%c'\n", argv0, optopt);
  117. usage();
  118. return 1;
  119. }
  120. }
  121. argc -= optind;
  122. argv += optind;
  123. if(e_flag)
  124. {
  125. // Fisher-Yates shuffles
  126. for(int i = 0; i <= argc - 2; i++)
  127. {
  128. int p = rand() % argc;
  129. // swap
  130. char *tmp = argv[p];
  131. argv[p] = argv[argc - 1];
  132. argv[argc - 1] = tmp;
  133. }
  134. unsigned long limit = argc;
  135. if(write_limit != 0 && write_limit < limit) limit = write_limit;
  136. for(unsigned long i = 0; i < limit; i++)
  137. {
  138. printf("%s%c", argv[i], delim);
  139. }
  140. return 0;
  141. }
  142. for(int i = 0; i < LINES_LEN; i++)
  143. lines[i] = NULL;
  144. if(argc <= 0)
  145. {
  146. if(shuf(stdin, "<stdin>") != 0) return 1;
  147. }
  148. else
  149. {
  150. for(int i = 0; i < argc; i++)
  151. {
  152. if(strncmp(argv[i], "-", 2) == 0)
  153. {
  154. if(shuf(stdin, "<stdin>") != 0) return 1;
  155. continue;
  156. }
  157. FILE *in = fopen(argv[i], "r");
  158. if(shuf(in, argv[i]) != 0)
  159. {
  160. fclose(in);
  161. return 1;
  162. }
  163. fclose(in);
  164. }
  165. }
  166. // inserts are random so iterating on it is fine
  167. for(int i = 0; i < LINES_LEN; i++)
  168. {
  169. if(write_limit != 0 && write_limit <= wrote) break;
  170. if(lines[i] != NULL)
  171. {
  172. fputs(lines[i], stdout);
  173. free(lines[i]);
  174. wrote++;
  175. }
  176. }
  177. return 0;
  178. }