logo

checksrc

Check directory for potential non-source files git clone https://anongit.hacktivis.me/git/checksrc.git
commit: 6a113122f8730963d35260e4d92ca7754fff9f2f
parent eb2baa335d05b790185a8d8752804454a4d07fd9
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Wed, 24 Sep 2025 05:55:41 +0200

check for dumps

Diffstat:

MREADME.md12+++++++-----
Mchecksrc.c52++++++++++++++++++++++++++++++++++++++++++++++------
2 files changed, 53 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md @@ -46,18 +46,20 @@ of the file looks like regular code. So `checksrc` can take much longer depending on the payloads. ## Detections +### Minor +Throws a warning but doesn't stops reading the file: + +- dump: block of 10 consecutive lines with lengths varying by less than 3 bytes (to detect hex dumps, base64, …) +- (planned) more punctuation, symbols, and numbers than letters [a-zA-Z\x7C-\xFF] (within 4KB blocks) + ### Major Throws an error, stops reading the file, exits unsuccessfully: - minified code: average line length of more than 100 characters (within 4KB blocks) - non-printable character (byte under 0x20 other than `\n`, `\r`, `\t`) +- 3 blocks of dump (see Minor) - (planned) string indicating generated code -### Minor -Throws a warning but doesn't stops reading the file: - -- (planned) more than 10 consecutive lines with lengths varying by less than 2 bytes (to detect hex dumps, base64, …) -- (planned) more punctuation, symbols, and numbers than letters [a-zA-Z\x7C-\xFF] (within 4KB blocks) ``` SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+checksrc@hacktivis.me> diff --git a/checksrc.c b/checksrc.c @@ -41,7 +41,10 @@ checkfile(const char *fname) return -1; } - size_t chars = 0, newlines = 0; + size_t lines_len[10]; + size_t chars = 0, line = 0; + size_t last_dump = 0; + int dumps = 0; for(;;) { static char buf[4096]; @@ -63,23 +66,60 @@ checkfile(const char *fname) if(buf[i] == '\t') { chars++; + lines_len[line % 10]++; continue; } if(buf[i] == '\n') { - newlines++; + if(line - last_dump > 10) + { + size_t maxdiff = 0; + for(size_t li = 1; li < 10; li++) + { + size_t diff = 0; + if(lines_len[li - 1] > lines_len[li]) + diff = lines_len[li - 1] - lines_len[li]; + else + diff = lines_len[li] - lines_len[li - 1]; + + if(diff > maxdiff) maxdiff = diff; + } + + if(maxdiff < 3) + { + last_dump = line; + printf("%s:[%zu - %zu] line length varied only by a maximum of %zu characters\n", + fname, + line - 10, + line, + maxdiff); + + if(++dumps == 3) + { + printf("%s: Found 3 dumps, stopping there\n", fname); + return 1; + } + } + } + + line++; + lines_len[line % 10] = 0; continue; } if(buf[i] < ' ') { - printf("binary (byte 0x%X found at position 0x%zX): %s\n", buf[i], i, fname); + printf("%s: binary (byte 0x%X found at position 0x%zX)\n", fname, buf[i], i); return 1; } // somewhat UTF-8 centric - if(buf[i] < 0x7F) chars++; + if(buf[i] < 0x7F) + { + chars++; + lines_len[line % 10]++; + } } } @@ -89,10 +129,10 @@ checkfile(const char *fname) // Consider that lines are on average shorter than 100 character // One false-positive being formats like SVG, fine for this tool - size_t lineavg = chars / newlines; + size_t lineavg = chars / line; if(chars > 200 && lineavg > 100) { - printf("minified (%zd characters / %zd newlines = %zd): %s\n", chars, newlines, lineavg, fname); + printf("%s: minified (%zd characters / %zd newlines = %zd)\n", fname, chars, line, lineavg); return 1; }