checksrc.c (6544B)
- // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+checksrc@hacktivis.me>
- // SPDX-License-Identifier: MPL-2.0
- #define _DEFAULT_SOURCE
- #include <ctype.h>
- #include <dirent.h>
- #include <errno.h>
- #include <fcntl.h>
- #include <fnmatch.h>
- #include <limits.h> // PATH_MAX
- #include <stdbool.h>
- #include <stdint.h> // uint8_t
- #include <stdio.h>
- #include <string.h>
- #include <unistd.h>
- #if 0
- static const char *generated[] = {
- "do not modify",
- "do not edit",
- "was generated",
- "is generated",
- "generated by",
- "generated from",
- "generated using",
- "automatically generated",
- "generated automatically",
- "machine generated",
- "autogenerated",
- "generated code"
- "produced by", // makeinfo
- "produced from",
- "produced using",
- };
- // Check what yacc (original), oyacc, heirloom yacc & lex produces
- // Maybe this table should be passed to lex for efficiency
- #endif
- bool verbose = false;
- #define EXCLUDE_MAX 400
- #define EXCLUDE_MAX_STR "400"
- static const char *exclude[400];
- static int exclude_len;
- static int
- checkfile(const char *fname)
- {
- int fd = open(fname, O_RDONLY | O_NOCTTY);
- if(fd < 0)
- {
- fprintf(stderr, "checksrc: error: Failed opening file '%s': %s\n", fname, strerror(errno));
- return -1;
- }
- size_t lines_len[10];
- size_t chars = 0, line = 0;
- size_t last_dump = 0;
- unsigned int dumps = 0;
- off_t pos = 0;
- for(;;)
- {
- static uint8_t buf[4096];
- unsigned int puncts = 0, numbers = 0, letters = 0;
- int nread = read(fd, &buf, 4096);
- if(nread == 0) break;
- if(nread < 0)
- {
- fprintf(
- stderr, "checksrc: error: Failed reading from file '%s': %s\n", fname, strerror(errno));
- close(fd);
- return -1;
- }
- for(size_t i = 0; i < nread; i++)
- {
- if(buf[i] == '\r') continue;
- if(buf[i] == '\t')
- {
- chars++;
- lines_len[line % 10]++;
- continue;
- }
- if(buf[i] == '\n')
- {
- if(line - last_dump > 10)
- {
- size_t maxdiff = 0;
- for(size_t li = 1; li < 10; li++)
- {
- size_t diff = 0;
- if(lines_len[li - 1] > lines_len[li])
- diff = lines_len[li - 1] - lines_len[li];
- else
- diff = lines_len[li] - lines_len[li - 1];
- if(diff > maxdiff) maxdiff = diff;
- }
- if(maxdiff < 3)
- {
- last_dump = line;
- printf("%s:[%zu - %zu] line length varied only by a maximum of %zu characters\n",
- fname,
- line - 10,
- line,
- maxdiff);
- if(++dumps == 3)
- {
- printf("%s: Found 3 dumps, stopping there\n", fname);
- close(fd);
- return 1;
- }
- }
- }
- line++;
- lines_len[line % 10] = 0;
- continue;
- }
- /* ('\e', ESC) */
- if(buf[i] == 0x1B) continue;
- /* 0x0 and obscure control characters. With \r, \t, \n, \e taken care off earlier */
- if(buf[i] < ' ')
- {
- printf("%s: binary (byte 0x%X found at position 0x%zX)\n", fname, buf[i], i);
- close(fd);
- return 1;
- }
- // somewhat UTF-8 centric
- if(buf[i] < 0x7F)
- {
- chars++;
- lines_len[line % 10]++;
- if(buf[i] == ' ') continue;
- if(isdigit(buf[i]))
- numbers++;
- else if(isalpha(buf[i]))
- letters++;
- else
- puncts++;
- }
- }
- if(puncts + numbers > letters)
- {
- printf("%s (block 0x%lX to 0x%lX): Got more punctuation(%d) + numbers(%d) than letters(%d)\n",
- fname,
- pos,
- pos + nread,
- puncts,
- numbers,
- letters);
- }
- pos += nread;
- }
- // TODO: Check for a "generated" string
- close(fd);
- if(chars > 200)
- {
- size_t lineavg = chars;
- if(line > 0) lineavg /= line;
- // Consider that lines are on average shorter than 100 character
- // One false-positive being formats like SVG, fine for this tool
- if(lineavg > 100)
- {
- printf("%s: minified (%zd characters / %zd newlines = %zd)\n", fname, chars, line, lineavg);
- return 1;
- }
- }
- if(verbose) printf("%s: OK\n", fname);
- return 0;
- }
- static int
- exclude_match(char *path)
- {
- for(int ei = 0; ei < exclude_len; ei++)
- if(fnmatch(exclude[ei], path, 0) == 0) return 0;
- return 1;
- }
- static int
- checkdir(const char *dirname)
- {
- int ret = 0;
- int dirfd = open(dirname, O_RDONLY | O_DIRECTORY);
- if(dirfd < 0)
- {
- fprintf(
- stderr, "checksrc: error: Failed opening directory '%s': %s\n", dirname, strerror(errno));
- return 1;
- }
- for(;;)
- {
- #define DENTSSIZ 10240
- char buf[DENTSSIZ];
- // Sadly posix_getdents is still a bit too recent
- ssize_t nread = getdents(dirfd, buf, DENTSSIZ);
- if(nread == 0) break;
- if(nread < 0)
- {
- fprintf(
- stderr, "checksrc: error: Failed reading directory '%s': %s\n", dirname, strerror(errno));
- close(dirfd);
- return 1;
- }
- for(ssize_t bpos = 0; bpos < nread;)
- {
- struct dirent *dent = (void *)&buf[bpos];
- bpos += dent->d_reclen;
- if(strcmp(dent->d_name, ".") == 0) continue;
- if(strcmp(dent->d_name, "..") == 0) continue;
- if(dent->d_type == DT_DIR || dent->d_type == DT_REG)
- {
- char path[PATH_MAX] = "";
- if(snprintf(path, PATH_MAX, "%s/%s", dirname, dent->d_name) < 0)
- {
- fprintf(stderr,
- "checksrc: error: Failed joining path in directory '%s': %s\n",
- dirname,
- strerror(errno));
- return 1;
- }
- if(exclude_match(path) == 0) continue;
- int err = 0;
- switch(dent->d_type)
- {
- case DT_DIR:
- err = checkdir(path);
- break;
- case DT_REG:
- err = checkfile(path);
- break;
- }
- if(err < 0) return 1;
- if(err > 0) ret = 1 + err;
- }
- }
- }
- close(dirfd);
- return ret;
- }
- static void
- usage(void)
- {
- fputs("Usage: checksrc [-v] [-d workdir] [-e excluded_path ...]\n", stderr);
- }
- int
- main(int argc, char *argv[])
- {
- char *workdir = NULL;
- for(int c = -1; (c = getopt(argc, argv, "vd:e:")) != -1;)
- {
- switch(c)
- {
- case 'v':
- verbose = true;
- break;
- case 'd':
- if(workdir)
- {
- fputs("checksrc: error: Option -d can only be passed once\n", stderr);
- usage();
- return 1;
- }
- workdir = optarg;
- break;
- case 'e':
- if(exclude_len == EXCLUDE_MAX)
- {
- fputs("checksrc: error: Can only use '-e' option " EXCLUDE_MAX_STR " times\n", stderr);
- return 1;
- }
- exclude[exclude_len++] = optarg;
- break;
- }
- }
- argc -= optind;
- argv += optind;
- if(argc > 0)
- {
- fprintf(stderr, "checksrc: error: Expected 0 arguments, got %d\n", argc);
- return 1;
- }
- if(workdir)
- {
- if(chdir(workdir) != 0)
- {
- fprintf(stderr,
- "checksrc: error: Failed changing into directory '%s': %s\n",
- workdir,
- strerror(errno));
- return 1;
- }
- }
- return checkdir(".");
- }