logo

checksrc

Check directory for potential non-source files git clone https://anongit.hacktivis.me/git/checksrc.git

checksrc.c (6544B)


  1. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+checksrc@hacktivis.me>
  2. // SPDX-License-Identifier: MPL-2.0
  3. #define _DEFAULT_SOURCE
  4. #include <ctype.h>
  5. #include <dirent.h>
  6. #include <errno.h>
  7. #include <fcntl.h>
  8. #include <fnmatch.h>
  9. #include <limits.h> // PATH_MAX
  10. #include <stdbool.h>
  11. #include <stdint.h> // uint8_t
  12. #include <stdio.h>
  13. #include <string.h>
  14. #include <unistd.h>
  15. #if 0
  16. static const char *generated[] = {
  17. "do not modify",
  18. "do not edit",
  19. "was generated",
  20. "is generated",
  21. "generated by",
  22. "generated from",
  23. "generated using",
  24. "automatically generated",
  25. "generated automatically",
  26. "machine generated",
  27. "autogenerated",
  28. "generated code"
  29. "produced by", // makeinfo
  30. "produced from",
  31. "produced using",
  32. };
  33. // Check what yacc (original), oyacc, heirloom yacc & lex produces
  34. // Maybe this table should be passed to lex for efficiency
  35. #endif
  36. bool verbose = false;
  37. #define EXCLUDE_MAX 400
  38. #define EXCLUDE_MAX_STR "400"
  39. static const char *exclude[400];
  40. static int exclude_len;
  41. static int
  42. checkfile(const char *fname)
  43. {
  44. int fd = open(fname, O_RDONLY | O_NOCTTY);
  45. if(fd < 0)
  46. {
  47. fprintf(stderr, "checksrc: error: Failed opening file '%s': %s\n", fname, strerror(errno));
  48. return -1;
  49. }
  50. size_t lines_len[10];
  51. size_t chars = 0, line = 0;
  52. size_t last_dump = 0;
  53. unsigned int dumps = 0;
  54. off_t pos = 0;
  55. for(;;)
  56. {
  57. static uint8_t buf[4096];
  58. unsigned int puncts = 0, numbers = 0, letters = 0;
  59. int nread = read(fd, &buf, 4096);
  60. if(nread == 0) break;
  61. if(nread < 0)
  62. {
  63. fprintf(
  64. stderr, "checksrc: error: Failed reading from file '%s': %s\n", fname, strerror(errno));
  65. close(fd);
  66. return -1;
  67. }
  68. for(size_t i = 0; i < nread; i++)
  69. {
  70. if(buf[i] == '\r') continue;
  71. if(buf[i] == '\t')
  72. {
  73. chars++;
  74. lines_len[line % 10]++;
  75. continue;
  76. }
  77. if(buf[i] == '\n')
  78. {
  79. if(line - last_dump > 10)
  80. {
  81. size_t maxdiff = 0;
  82. for(size_t li = 1; li < 10; li++)
  83. {
  84. size_t diff = 0;
  85. if(lines_len[li - 1] > lines_len[li])
  86. diff = lines_len[li - 1] - lines_len[li];
  87. else
  88. diff = lines_len[li] - lines_len[li - 1];
  89. if(diff > maxdiff) maxdiff = diff;
  90. }
  91. if(maxdiff < 3)
  92. {
  93. last_dump = line;
  94. printf("%s:[%zu - %zu] line length varied only by a maximum of %zu characters\n",
  95. fname,
  96. line - 10,
  97. line,
  98. maxdiff);
  99. if(++dumps == 3)
  100. {
  101. printf("%s: Found 3 dumps, stopping there\n", fname);
  102. close(fd);
  103. return 1;
  104. }
  105. }
  106. }
  107. line++;
  108. lines_len[line % 10] = 0;
  109. continue;
  110. }
  111. /* ('\e', ESC) */
  112. if(buf[i] == 0x1B) continue;
  113. /* 0x0 and obscure control characters. With \r, \t, \n, \e taken care off earlier */
  114. if(buf[i] < ' ')
  115. {
  116. printf("%s: binary (byte 0x%X found at position 0x%zX)\n", fname, buf[i], i);
  117. close(fd);
  118. return 1;
  119. }
  120. // somewhat UTF-8 centric
  121. if(buf[i] < 0x7F)
  122. {
  123. chars++;
  124. lines_len[line % 10]++;
  125. if(buf[i] == ' ') continue;
  126. if(isdigit(buf[i]))
  127. numbers++;
  128. else if(isalpha(buf[i]))
  129. letters++;
  130. else
  131. puncts++;
  132. }
  133. }
  134. if(puncts + numbers > letters)
  135. {
  136. printf("%s (block 0x%lX to 0x%lX): Got more punctuation(%d) + numbers(%d) than letters(%d)\n",
  137. fname,
  138. pos,
  139. pos + nread,
  140. puncts,
  141. numbers,
  142. letters);
  143. }
  144. pos += nread;
  145. }
  146. // TODO: Check for a "generated" string
  147. close(fd);
  148. if(chars > 200)
  149. {
  150. size_t lineavg = chars;
  151. if(line > 0) lineavg /= line;
  152. // Consider that lines are on average shorter than 100 character
  153. // One false-positive being formats like SVG, fine for this tool
  154. if(lineavg > 100)
  155. {
  156. printf("%s: minified (%zd characters / %zd newlines = %zd)\n", fname, chars, line, lineavg);
  157. return 1;
  158. }
  159. }
  160. if(verbose) printf("%s: OK\n", fname);
  161. return 0;
  162. }
  163. static int
  164. exclude_match(char *path)
  165. {
  166. for(int ei = 0; ei < exclude_len; ei++)
  167. if(fnmatch(exclude[ei], path, 0) == 0) return 0;
  168. return 1;
  169. }
  170. static int
  171. checkdir(const char *dirname)
  172. {
  173. int ret = 0;
  174. int dirfd = open(dirname, O_RDONLY | O_DIRECTORY);
  175. if(dirfd < 0)
  176. {
  177. fprintf(
  178. stderr, "checksrc: error: Failed opening directory '%s': %s\n", dirname, strerror(errno));
  179. return 1;
  180. }
  181. for(;;)
  182. {
  183. #define DENTSSIZ 10240
  184. char buf[DENTSSIZ];
  185. // Sadly posix_getdents is still a bit too recent
  186. ssize_t nread = getdents(dirfd, buf, DENTSSIZ);
  187. if(nread == 0) break;
  188. if(nread < 0)
  189. {
  190. fprintf(
  191. stderr, "checksrc: error: Failed reading directory '%s': %s\n", dirname, strerror(errno));
  192. close(dirfd);
  193. return 1;
  194. }
  195. for(ssize_t bpos = 0; bpos < nread;)
  196. {
  197. struct dirent *dent = (void *)&buf[bpos];
  198. bpos += dent->d_reclen;
  199. if(strcmp(dent->d_name, ".") == 0) continue;
  200. if(strcmp(dent->d_name, "..") == 0) continue;
  201. if(dent->d_type == DT_DIR || dent->d_type == DT_REG)
  202. {
  203. char path[PATH_MAX] = "";
  204. if(snprintf(path, PATH_MAX, "%s/%s", dirname, dent->d_name) < 0)
  205. {
  206. fprintf(stderr,
  207. "checksrc: error: Failed joining path in directory '%s': %s\n",
  208. dirname,
  209. strerror(errno));
  210. return 1;
  211. }
  212. if(exclude_match(path) == 0) continue;
  213. int err = 0;
  214. switch(dent->d_type)
  215. {
  216. case DT_DIR:
  217. err = checkdir(path);
  218. break;
  219. case DT_REG:
  220. err = checkfile(path);
  221. break;
  222. }
  223. if(err < 0) return 1;
  224. if(err > 0) ret = 1 + err;
  225. }
  226. }
  227. }
  228. close(dirfd);
  229. return ret;
  230. }
  231. static void
  232. usage(void)
  233. {
  234. fputs("Usage: checksrc [-v] [-d workdir] [-e excluded_path ...]\n", stderr);
  235. }
  236. int
  237. main(int argc, char *argv[])
  238. {
  239. char *workdir = NULL;
  240. for(int c = -1; (c = getopt(argc, argv, "vd:e:")) != -1;)
  241. {
  242. switch(c)
  243. {
  244. case 'v':
  245. verbose = true;
  246. break;
  247. case 'd':
  248. if(workdir)
  249. {
  250. fputs("checksrc: error: Option -d can only be passed once\n", stderr);
  251. usage();
  252. return 1;
  253. }
  254. workdir = optarg;
  255. break;
  256. case 'e':
  257. if(exclude_len == EXCLUDE_MAX)
  258. {
  259. fputs("checksrc: error: Can only use '-e' option " EXCLUDE_MAX_STR " times\n", stderr);
  260. return 1;
  261. }
  262. exclude[exclude_len++] = optarg;
  263. break;
  264. }
  265. }
  266. argc -= optind;
  267. argv += optind;
  268. if(argc > 0)
  269. {
  270. fprintf(stderr, "checksrc: error: Expected 0 arguments, got %d\n", argc);
  271. return 1;
  272. }
  273. if(workdir)
  274. {
  275. if(chdir(workdir) != 0)
  276. {
  277. fprintf(stderr,
  278. "checksrc: error: Failed changing into directory '%s': %s\n",
  279. workdir,
  280. strerror(errno));
  281. return 1;
  282. }
  283. }
  284. return checkdir(".");
  285. }