logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

split.c (7119B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 200809L
  5. #include "../config.h"
  6. #include "../libutils/fs.h" // auto_file_copy
  7. #include "../libutils/getopt_nolong.h"
  8. #include "../libutils/truncation.h" // apply_size_suffix
  9. #include <errno.h>
  10. #include <fcntl.h> // open
  11. #include <limits.h> // NAME_MAX
  12. #include <stdio.h> // fprintf
  13. #include <stdlib.h> // strtoul
  14. #include <string.h> // strerror
  15. #include <sys/stat.h> // fstat
  16. #include <unistd.h> // getopt
  17. const char *argv0 = "split";
  18. const char *name = "x";
  19. size_t name_len = 1;
  20. size_t suffix_len = 2, bytes = 0, lines = 0;
  21. char *name_in = NULL;
  22. static int
  23. base26(int id, char *str)
  24. {
  25. memcpy(str, name, name_len);
  26. memset(str + name_len, 'a', suffix_len);
  27. size_t id_p = name_len + suffix_len;
  28. do
  29. {
  30. str[id_p--] = 'a' + (id % 26);
  31. id /= 26;
  32. } while(id > 0 && id_p > name_len);
  33. if(id_p <= name_len)
  34. {
  35. fprintf(stderr,
  36. "%s: error: Failed representing %d into suffix of length %zu\n",
  37. argv0,
  38. id,
  39. suffix_len);
  40. return -1;
  41. }
  42. return 0;
  43. }
  44. static int
  45. split_bytes(void)
  46. {
  47. int fd_in = STDIN_FILENO;
  48. if(name_in != NULL)
  49. {
  50. fd_in = open(name_in, O_RDONLY | O_NOCTTY);
  51. if(fd_in < 0)
  52. {
  53. fprintf(stderr, "%s: error: Failed opening '%s' file: %s\n", argv0, name_in, strerror(errno));
  54. return 1;
  55. }
  56. }
  57. struct stat fd_in_stat;
  58. if(fstat(fd_in, &fd_in_stat) != 0)
  59. {
  60. fprintf(stderr,
  61. "%s: error: Failed getting status from file '%s': %s",
  62. argv0,
  63. name_in,
  64. strerror(errno));
  65. close(fd_in);
  66. return 1;
  67. }
  68. posix_fadvise(fd_in, 0, 0, POSIX_FADV_SEQUENTIAL);
  69. errno = 0;
  70. int err = 0;
  71. off_t wrote = 0;
  72. int split_id = 0;
  73. while(wrote < fd_in_stat.st_size)
  74. {
  75. char name_out[NAME_MAX] = "";
  76. if(base26(split_id++, name_out) < 0) return 1;
  77. int fd_out = open(name_out, O_WRONLY | O_NOCTTY | O_CREAT, 0644);
  78. if(fd_out < 0)
  79. {
  80. fprintf(
  81. stderr, "%s: error: Failed opening '%s' file: %s\n", argv0, name_out, strerror(errno));
  82. err = 1;
  83. break;
  84. }
  85. posix_fadvise(fd_out, 0, 0, POSIX_FADV_SEQUENTIAL);
  86. errno = 0;
  87. int ret = auto_file_copy(fd_in, fd_out, bytes, 0);
  88. if(ret < 0)
  89. {
  90. fprintf(stderr,
  91. "%s: error: Failed copying from file '%s' to file '%s': %s\n",
  92. argv0,
  93. name_in ? name_in : "<stdin>",
  94. name_out,
  95. strerror(errno));
  96. close(fd_out);
  97. err = 1;
  98. break;
  99. }
  100. wrote += ret;
  101. if(close(fd_out) < 0)
  102. {
  103. fprintf(
  104. stderr, "%s: error: Failed closing file '%s': %s\n", argv0, name_out, strerror(errno));
  105. err = 1;
  106. break;
  107. }
  108. }
  109. if(name_in != NULL) close(fd_in);
  110. return err;
  111. }
  112. static int
  113. split_lines(void)
  114. {
  115. FILE *in = stdin;
  116. if(name_in != NULL)
  117. {
  118. in = fopen(name_in, "r");
  119. if(in == NULL)
  120. {
  121. fprintf(stderr, "%s: error: Failed opening '%s' file: %s\n", argv0, name_in, strerror(errno));
  122. return 1;
  123. }
  124. }
  125. int err = 0;
  126. char *line = NULL;
  127. size_t line_len = 0;
  128. int split_id = 0;
  129. while(true)
  130. {
  131. if(feof(in)) break;
  132. if(ferror(in))
  133. {
  134. fprintf(stderr,
  135. "%s: error: Failed reading line from file '%s': %s\n",
  136. argv0,
  137. name_in,
  138. strerror(errno));
  139. err = 1;
  140. break;
  141. }
  142. char name_out[NAME_MAX] = "";
  143. if(base26(split_id++, name_out) < 0)
  144. {
  145. err = 1;
  146. break;
  147. }
  148. FILE *out = NULL;
  149. for(size_t i = 0; i < lines; i++)
  150. {
  151. ssize_t nread = getline(&line, &line_len, in);
  152. if(nread < 0)
  153. {
  154. if(errno != 0)
  155. {
  156. fprintf(stderr,
  157. "%s: error: Failed reading line from file '%s': %s\n",
  158. argv0,
  159. name_in,
  160. strerror(errno));
  161. err = 1;
  162. }
  163. break;
  164. }
  165. if(out == NULL)
  166. {
  167. out = fopen(name_out, "w");
  168. if(out == NULL)
  169. {
  170. fprintf(stderr,
  171. "%s: error: Failed opening '%s' file: %s\n",
  172. argv0,
  173. name_out,
  174. strerror(errno));
  175. err = 1;
  176. break;
  177. }
  178. }
  179. if(fwrite(line, nread, 1, out) == 0)
  180. {
  181. fprintf(stderr,
  182. "%s: error: Failed writing line to file '%s': %s\n",
  183. argv0,
  184. name_out,
  185. strerror(errno));
  186. err = 1;
  187. break;
  188. }
  189. }
  190. if(out != NULL)
  191. {
  192. if(fclose(out) < 0)
  193. {
  194. fprintf(
  195. stderr, "%s: error: Failed closing file '%s': %s\n", argv0, name_out, strerror(errno));
  196. err = 1;
  197. break;
  198. }
  199. }
  200. if(err != 0) break;
  201. }
  202. if(line_len > 0) free(line);
  203. if(name_in != NULL) fclose(in);
  204. return err;
  205. }
  206. static const char *error_opt_b_l = "%s: error: Options -b and -l are mutually exclusive\n";
  207. int
  208. main(int argc, char *argv[])
  209. {
  210. for(int c = -1; (c = getopt_nolong(argc, argv, ":a:b:l:")) != -1;)
  211. {
  212. char *endptr = NULL;
  213. switch(c)
  214. {
  215. case 'a':
  216. suffix_len = strtoul(optarg, &endptr, 0);
  217. if(suffix_len == 0)
  218. {
  219. fprintf(stderr, "%s: error: Failed parsing '-a %s': %s\n", argv0, optarg, strerror(errno));
  220. return 1;
  221. }
  222. if(endptr != NULL && *endptr != '\0')
  223. {
  224. fprintf(stderr,
  225. "%s: error: Invalid trailing characters in '-a %s': %s\n",
  226. argv0,
  227. optarg,
  228. endptr);
  229. return 1;
  230. }
  231. break;
  232. case 'b':
  233. {
  234. if(lines != 0)
  235. {
  236. fprintf(stderr, error_opt_b_l, argv0);
  237. return 1;
  238. }
  239. unsigned long opt_b = strtoul(optarg, &endptr, 0);
  240. if(opt_b == 0)
  241. {
  242. fprintf(stderr, "%s: error: Failed parsing '-b %s': %s\n", argv0, optarg, strerror(errno));
  243. return 1;
  244. }
  245. if(endptr != NULL && *endptr != 0)
  246. if(apply_size_suffix(&opt_b, endptr) != 0) return 1;
  247. bytes = opt_b;
  248. lines = 0;
  249. break;
  250. }
  251. case 'l':
  252. if(bytes != 0)
  253. {
  254. fprintf(stderr, error_opt_b_l, argv0);
  255. return 1;
  256. }
  257. lines = strtoul(optarg, &endptr, 0);
  258. if(lines == 0)
  259. {
  260. fprintf(stderr, "%s: error: Failed parsing '-l %s': %s\n", argv0, optarg, strerror(errno));
  261. return 1;
  262. }
  263. if(endptr != NULL && *endptr != '\0')
  264. {
  265. fprintf(stderr,
  266. "%s: error: Invalid trailing characters in '-l %s': %s\n",
  267. argv0,
  268. optarg,
  269. endptr);
  270. return 1;
  271. }
  272. break;
  273. case ':':
  274. fprintf(stderr, "%s: error: Option '-%c' requires an operand\n", argv0, optopt);
  275. return 1;
  276. default:
  277. fprintf(stderr, "%s: error: Unhandled option '-%c'\n", argv0, optopt);
  278. return 1;
  279. }
  280. }
  281. argc -= optind;
  282. argv += optind;
  283. if(lines == 0 && bytes == 0) lines = 1000;
  284. if(argc > 2 || argc < 0)
  285. {
  286. fprintf(stderr, "%s: error: Expected 0, 1, or 2 arguments, got %d\n", argv0, argc);
  287. return 1;
  288. }
  289. else if(argc >= 1)
  290. {
  291. if(!(argv[0][0] == '-' && argv[0][1] == '\0')) name_in = argv[0];
  292. if(argc == 2) name = argv[1];
  293. }
  294. name_len = strlen(name);
  295. if(name_len + suffix_len > NAME_MAX)
  296. {
  297. fprintf(stderr,
  298. "%s: error: name(%zd bytes) + suffix_length(%zd bytes) > NAME_MAX(%d bytes)\n",
  299. argv0,
  300. name_len,
  301. suffix_len,
  302. NAME_MAX);
  303. return 1;
  304. }
  305. if(bytes != 0) return split_bytes();
  306. return split_lines();
  307. }