logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

shuf.c (3945B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 200809L
  5. #include <errno.h>
  6. #include <stdbool.h>
  7. #include <stdio.h> // getdelim, fprintf
  8. #include <stdlib.h> // free, malloc, srand, rand, exit, strtoul
  9. #include <string.h> // strerror, memcpy
  10. #include <time.h> // time
  11. #include <unistd.h> // getopt
  12. // Not a full shuffle, if there is more than 512 lines then last lines are never going to be printed first.
  13. // But this allows bounded memory usage.
  14. // FIXME: handle newline-less lines
  15. const char *argv0 = "shuf";
  16. #define LINES_LEN 512
  17. static char *lines[LINES_LEN];
  18. static char delim = '\n';
  19. char *line = NULL;
  20. size_t line_len = 0;
  21. unsigned long wrote = 0;
  22. unsigned long write_limit = 0;
  23. static int
  24. shuf(FILE *in, const char *fname)
  25. {
  26. for(int ln = 0;; ln++)
  27. {
  28. errno = 0;
  29. ssize_t nread = getdelim(&line, &line_len, delim, in);
  30. if(errno != 0)
  31. {
  32. fprintf(stderr,
  33. "%s: error: Failed reading line %d from file \"%s\": %s\n",
  34. argv0,
  35. ln,
  36. fname,
  37. strerror(errno));
  38. return 1;
  39. }
  40. if(nread < 0) return 0;
  41. errno = 0;
  42. char *dup = malloc(nread);
  43. if(!dup)
  44. {
  45. fprintf(
  46. stderr,
  47. "%s: error: Failed to allocate %zd bytes of memory for line %d from file \"%s\": %s\n",
  48. argv0,
  49. nread,
  50. ln,
  51. fname,
  52. strerror(errno));
  53. return 1;
  54. }
  55. memcpy(dup, line, nread);
  56. int p = rand() % LINES_LEN;
  57. if(lines[p] != NULL)
  58. {
  59. fputs(lines[p], stdout);
  60. free(lines[p]);
  61. lines[p] = NULL;
  62. wrote++;
  63. if(write_limit != 0 && write_limit <= wrote) exit(0);
  64. }
  65. lines[p] = dup;
  66. }
  67. }
  68. static void
  69. usage(void)
  70. {
  71. fputs("Usage: shuf [-z] [files...]\n", stderr);
  72. }
  73. int
  74. main(int argc, char *argv[])
  75. {
  76. bool e_flag = false;
  77. srand((int)time(NULL));
  78. for(int c = -1; (c = getopt(argc, argv, ":en:z")) != -1;)
  79. {
  80. char *endptr = NULL;
  81. switch(c)
  82. {
  83. case 'e':
  84. e_flag = true;
  85. break;
  86. case 'n':
  87. write_limit = strtoul(optarg, &endptr, 0);
  88. if(errno != 0)
  89. {
  90. fprintf(stderr,
  91. "%s: error: Failed parsing number for `-n %s`: %s\n",
  92. argv0,
  93. optarg,
  94. strerror(errno));
  95. return 1;
  96. }
  97. if(endptr != NULL && *endptr != 0)
  98. {
  99. fprintf(stderr,
  100. "%s: error: Found extraneous characters while parsing `-n %s` as a number: %s\n",
  101. argv0,
  102. optarg,
  103. endptr);
  104. return 1;
  105. }
  106. break;
  107. case 'z':
  108. delim = '\0';
  109. break;
  110. case ':':
  111. fprintf(stderr, "%s: error: Missing operand for option: '-%c'\n", argv0, optopt);
  112. usage();
  113. return 1;
  114. case '?':
  115. fprintf(stderr, "%s: error: Unrecognised option: '-%c'\n", argv0, optopt);
  116. usage();
  117. return 1;
  118. }
  119. }
  120. argc -= optind;
  121. argv += optind;
  122. if(e_flag)
  123. {
  124. // Fisher-Yates shuffles
  125. for(int i = 0; i <= argc - 2; i++)
  126. {
  127. int p = rand() % argc;
  128. // swap
  129. char *tmp = argv[p];
  130. argv[p] = argv[argc - 1];
  131. argv[argc - 1] = tmp;
  132. }
  133. unsigned long limit = argc;
  134. if(write_limit != 0 && write_limit < limit) limit = write_limit;
  135. for(unsigned long i = 0; i < limit; i++)
  136. {
  137. printf("%s%c", argv[i], delim);
  138. }
  139. return 0;
  140. }
  141. for(int i = 0; i < LINES_LEN; i++)
  142. lines[i] = NULL;
  143. if(argc <= 0)
  144. {
  145. if(shuf(stdin, "<stdin>") != 0) return 1;
  146. }
  147. else
  148. {
  149. for(int i = 0; i < argc; i++)
  150. {
  151. if(strncmp(argv[i], "-", 2) == 0)
  152. {
  153. if(shuf(stdin, "<stdin>") != 0) return 1;
  154. continue;
  155. }
  156. FILE *in = fopen(argv[i], "r");
  157. if(shuf(in, argv[i]) != 0)
  158. {
  159. fclose(in);
  160. return 1;
  161. }
  162. fclose(in);
  163. }
  164. }
  165. // inserts are random so iterating on it is fine
  166. for(int i = 0; i < LINES_LEN; i++)
  167. {
  168. if(write_limit != 0 && write_limit <= wrote) break;
  169. if(lines[i] != NULL)
  170. {
  171. fputs(lines[i], stdout);
  172. free(lines[i]);
  173. wrote++;
  174. }
  175. }
  176. return 0;
  177. }