logo

checksrc

Check directory for potential non-source files git clone https://anongit.hacktivis.me/git/checksrc.git
commit: 8a2982b7c9109eb3a0336f958015a95cd314aeda
parent 2979f91180c081b6e2e8e1f591ba21120b4a434d
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Wed, 24 Sep 2025 07:20:38 +0200

check amount of punctuation+numbers vs. letters

Diffstat:

MREADME.md2+-
Mchecksrc.c40++++++++++++++++++++++++++++++++++------
2 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md @@ -50,7 +50,7 @@ So `checksrc` can take much longer depending on the payloads. Throws a warning but doesn't stops reading the file: - dump: block of 10 consecutive lines with lengths varying by less than 3 bytes (to detect hex dumps, base64, …) -- (planned) more punctuation, symbols, and numbers than letters [a-zA-Z\x7C-\xFF] (within 4KB blocks) +- more punctuation, symbols, and numbers than letters (within 4KB blocks) ### Major Throws an error, stops reading the file, exits unsuccessfully: diff --git a/checksrc.c b/checksrc.c @@ -1,15 +1,16 @@ // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+checksrc@hacktivis.me> // SPDX-License-Identifier: MPL-2.0 #define _DEFAULT_SOURCE +#include <ctype.h> #include <dirent.h> #include <errno.h> #include <fcntl.h> #include <limits.h> // PATH_MAX +#include <stdbool.h> +#include <stdint.h> // uint8_t #include <stdio.h> #include <string.h> #include <unistd.h> -#include <stdbool.h> -#include <stdint.h> // uint8_t #if 0 static const char *generated[] = { @@ -48,10 +49,12 @@ checkfile(const char *fname) size_t lines_len[10]; size_t chars = 0, line = 0; size_t last_dump = 0; - int dumps = 0; + unsigned int dumps = 0; + off_t pos = 0; for(;;) { static uint8_t buf[4096]; + unsigned int puncts = 0, numbers = 0, letters = 0; int nread = read(fd, &buf, 4096); if(nread == 0) break; @@ -113,8 +116,11 @@ checkfile(const char *fname) continue; } - /* control characters. Except 0x1B ('^[', ESC). With \n, \r taken care off earlier */ - if(buf[i] < ' ' && buf[i] != 0x1B) + /* ('\e', ESC) */ + if(buf[i] == 0x1B) continue; + + /* control characters. With \r, \t, \n, \e taken care off earlier */ + if(buf[i] < ' ') { printf("%s: binary (byte 0x%X found at position 0x%zX)\n", fname, buf[i], i); close(fd); @@ -126,8 +132,30 @@ checkfile(const char *fname) { chars++; lines_len[line % 10]++; + + if(buf[i] == ' ') continue; + + if(isdigit(buf[i])) + numbers++; + else if(isalpha(buf[i])) + letters++; + else + puncts++; } } + + if(puncts + numbers > letters) + { + printf("%s (block 0x%lX to 0x%lX): Got more punctuation(%d) + numbers(%d) than letters(%d)\n", + fname, + pos, + pos + nread, + puncts, + numbers, + letters); + } + + pos += nread; } // TODO: Check for a "generated" string @@ -214,7 +242,7 @@ checkdir(const char *dirname) continue; } if(err < 0) return 1; - if(err > 0) ret = 1+err; + if(err > 0) ret = 1 + err; } } }