logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

split.c (7643B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 200809L
  5. #include "../config.h"
  6. #include "../libutils/fs.h" // auto_file_copy
  7. #include "../libutils/getopt_nolong.h"
  8. #include "../libutils/truncation.h" // apply_size_suffix
  9. #include <errno.h>
  10. #include <fcntl.h> // open
  11. #include <limits.h> // NAME_MAX
  12. #include <stdio.h> // fprintf
  13. #include <stdlib.h> // strtoul
  14. #include <string.h> // strerror
  15. #include <sys/stat.h> // fstat
  16. #include <unistd.h> // getopt
  17. #ifdef HAS_GETOPT_LONG
  18. #include <getopt.h>
  19. #endif
  20. const char *argv0 = "split";
  21. const char *name = "x";
  22. size_t name_len = 1;
  23. size_t suffix_len = 2, bytes = 0, lines = 0;
  24. char *name_in = NULL;
  25. static int
  26. base26(int id, char *str)
  27. {
  28. memcpy(str, name, name_len);
  29. memset(str + name_len, 'a', suffix_len);
  30. size_t id_p = name_len + suffix_len;
  31. do
  32. {
  33. str[id_p--] = 'a' + (id % 26);
  34. id /= 26;
  35. } while(id > 0 && id_p > name_len);
  36. if(id_p <= name_len)
  37. {
  38. fprintf(stderr,
  39. "%s: error: Failed representing %d into suffix of length %zu\n",
  40. argv0,
  41. id,
  42. suffix_len);
  43. return -1;
  44. }
  45. return 0;
  46. }
  47. static int
  48. split_bytes(void)
  49. {
  50. int fd_in = STDIN_FILENO;
  51. if(name_in != NULL)
  52. {
  53. fd_in = open(name_in, O_RDONLY | O_NOCTTY);
  54. if(fd_in < 0)
  55. {
  56. fprintf(stderr, "%s: error: Failed opening '%s' file: %s\n", argv0, name_in, strerror(errno));
  57. return 1;
  58. }
  59. }
  60. struct stat fd_in_stat;
  61. if(fstat(fd_in, &fd_in_stat) != 0)
  62. {
  63. fprintf(stderr,
  64. "%s: error: Failed getting status from file '%s': %s",
  65. argv0,
  66. name_in,
  67. strerror(errno));
  68. close(fd_in);
  69. return 1;
  70. }
  71. posix_fadvise(fd_in, 0, 0, POSIX_FADV_SEQUENTIAL);
  72. errno = 0;
  73. int err = 0;
  74. off_t wrote = 0;
  75. int split_id = 0;
  76. while(wrote < fd_in_stat.st_size)
  77. {
  78. char name_out[NAME_MAX] = "";
  79. if(base26(split_id++, name_out) < 0) return 1;
  80. int fd_out = open(name_out, O_WRONLY | O_NOCTTY | O_CREAT, 0644);
  81. if(fd_out < 0)
  82. {
  83. fprintf(
  84. stderr, "%s: error: Failed opening '%s' file: %s\n", argv0, name_out, strerror(errno));
  85. err = 1;
  86. break;
  87. }
  88. posix_fadvise(fd_out, 0, 0, POSIX_FADV_SEQUENTIAL);
  89. errno = 0;
  90. int ret = auto_file_copy(fd_in, fd_out, bytes, 0);
  91. if(ret < 0)
  92. {
  93. fprintf(stderr,
  94. "%s: error: Failed copying from file '%s' to file '%s': %s\n",
  95. argv0,
  96. name_in ? name_in : "<stdin>",
  97. name_out,
  98. strerror(errno));
  99. close(fd_out);
  100. err = 1;
  101. break;
  102. }
  103. wrote += ret;
  104. if(close(fd_out) < 0)
  105. {
  106. fprintf(
  107. stderr, "%s: error: Failed closing file '%s': %s\n", argv0, name_out, strerror(errno));
  108. err = 1;
  109. break;
  110. }
  111. }
  112. if(name_in != NULL) close(fd_in);
  113. return err;
  114. }
  115. static int
  116. split_lines(void)
  117. {
  118. FILE *in = stdin;
  119. if(name_in != NULL)
  120. {
  121. in = fopen(name_in, "r");
  122. if(in == NULL)
  123. {
  124. fprintf(stderr, "%s: error: Failed opening '%s' file: %s\n", argv0, name_in, strerror(errno));
  125. return 1;
  126. }
  127. }
  128. int err = 0;
  129. char *line = NULL;
  130. size_t line_len = 0;
  131. int split_id = 0;
  132. while(true)
  133. {
  134. if(feof(in)) break;
  135. if(ferror(in))
  136. {
  137. fprintf(stderr,
  138. "%s: error: Failed reading line from file '%s': %s\n",
  139. argv0,
  140. name_in,
  141. strerror(errno));
  142. err = 1;
  143. break;
  144. }
  145. char name_out[NAME_MAX] = "";
  146. if(base26(split_id++, name_out) < 0)
  147. {
  148. err = 1;
  149. break;
  150. }
  151. FILE *out = NULL;
  152. for(size_t i = 0; i < lines; i++)
  153. {
  154. ssize_t nread = getline(&line, &line_len, in);
  155. if(nread < 0)
  156. {
  157. if(errno != 0)
  158. {
  159. fprintf(stderr,
  160. "%s: error: Failed reading line from file '%s': %s\n",
  161. argv0,
  162. name_in,
  163. strerror(errno));
  164. err = 1;
  165. }
  166. break;
  167. }
  168. if(out == NULL)
  169. {
  170. out = fopen(name_out, "w");
  171. if(out == NULL)
  172. {
  173. fprintf(stderr,
  174. "%s: error: Failed opening '%s' file: %s\n",
  175. argv0,
  176. name_out,
  177. strerror(errno));
  178. err = 1;
  179. break;
  180. }
  181. }
  182. if(fwrite(line, nread, 1, out) == 0)
  183. {
  184. fprintf(stderr,
  185. "%s: error: Failed writing line to file '%s': %s\n",
  186. argv0,
  187. name_out,
  188. strerror(errno));
  189. err = 1;
  190. break;
  191. }
  192. }
  193. if(out != NULL)
  194. {
  195. if(fclose(out) < 0)
  196. {
  197. fprintf(
  198. stderr, "%s: error: Failed closing file '%s': %s\n", argv0, name_out, strerror(errno));
  199. err = 1;
  200. break;
  201. }
  202. }
  203. if(err != 0) break;
  204. }
  205. if(line_len > 0) free(line);
  206. if(name_in != NULL) fclose(in);
  207. return err;
  208. }
  209. static const char *error_opt_b_l = "%s: error: Options -b and -l are mutually exclusive\n";
  210. int
  211. main(int argc, char *argv[])
  212. {
  213. #ifdef HAS_GETOPT_LONG
  214. // Strictly for GNUisms compatibility so no long-only options
  215. // clang-format off
  216. static struct option opts[] = {
  217. {"suffix-length", required_argument, NULL, 'a'},
  218. {"bytes", required_argument, NULL, 'b'},
  219. {"lines", required_argument, NULL, 'l'},
  220. {0, 0, 0, 0},
  221. };
  222. // clang-format on
  223. // Need + as first character to get POSIX-style option parsing
  224. for(int c = -1; (c = getopt_long(argc, argv, "+:a:b:l:", opts, NULL)) != -1;)
  225. #else
  226. for(int c = -1; (c = getopt_nolong(argc, argv, ":a:b:l:")) != -1;)
  227. #endif
  228. {
  229. char *endptr = NULL;
  230. switch(c)
  231. {
  232. case 'a':
  233. suffix_len = strtoul(optarg, &endptr, 0);
  234. if(suffix_len == 0)
  235. {
  236. fprintf(stderr, "%s: error: Failed parsing '-a %s': %s\n", argv0, optarg, strerror(errno));
  237. return 1;
  238. }
  239. if(endptr != NULL && *endptr != '\0')
  240. {
  241. fprintf(stderr,
  242. "%s: error: Invalid trailing characters in '-a %s': %s\n",
  243. argv0,
  244. optarg,
  245. endptr);
  246. return 1;
  247. }
  248. break;
  249. case 'b':
  250. {
  251. if(lines != 0)
  252. {
  253. fprintf(stderr, error_opt_b_l, argv0);
  254. return 1;
  255. }
  256. unsigned long opt_b = strtoul(optarg, &endptr, 0);
  257. if(opt_b == 0)
  258. {
  259. fprintf(stderr, "%s: error: Failed parsing '-b %s': %s\n", argv0, optarg, strerror(errno));
  260. return 1;
  261. }
  262. if(endptr != NULL && *endptr != 0)
  263. if(apply_size_suffix(&opt_b, endptr) != 0) return 1;
  264. bytes = opt_b;
  265. lines = 0;
  266. break;
  267. }
  268. case 'l':
  269. if(bytes != 0)
  270. {
  271. fprintf(stderr, error_opt_b_l, argv0);
  272. return 1;
  273. }
  274. lines = strtoul(optarg, &endptr, 0);
  275. if(lines == 0)
  276. {
  277. fprintf(stderr, "%s: error: Failed parsing '-l %s': %s\n", argv0, optarg, strerror(errno));
  278. return 1;
  279. }
  280. if(endptr != NULL && *endptr != '\0')
  281. {
  282. fprintf(stderr,
  283. "%s: error: Invalid trailing characters in '-l %s': %s\n",
  284. argv0,
  285. optarg,
  286. endptr);
  287. return 1;
  288. }
  289. break;
  290. case ':':
  291. fprintf(stderr, "%s: error: Option '-%c' requires an operand\n", argv0, optopt);
  292. return 1;
  293. default:
  294. fprintf(stderr, "%s: error: Unhandled option '-%c'\n", argv0, optopt);
  295. return 1;
  296. }
  297. }
  298. argc -= optind;
  299. argv += optind;
  300. if(lines == 0 && bytes == 0) lines = 1000;
  301. if(argc > 2 || argc < 0)
  302. {
  303. fprintf(stderr, "%s: error: Expected 0, 1, or 2 arguments, got %d\n", argv0, argc);
  304. return 1;
  305. }
  306. else if(argc >= 1)
  307. {
  308. if(!(argv[0][0] == '-' && argv[0][1] == '\0')) name_in = argv[0];
  309. if(argc == 2) name = argv[1];
  310. }
  311. name_len = strlen(name);
  312. if(name_len + suffix_len > NAME_MAX)
  313. {
  314. fprintf(stderr,
  315. "%s: error: name(%zd bytes) + suffix_length(%zd bytes) > NAME_MAX(%d bytes)\n",
  316. argv0,
  317. name_len,
  318. suffix_len,
  319. NAME_MAX);
  320. return 1;
  321. }
  322. if(bytes != 0) return split_bytes();
  323. return split_lines();
  324. }