commit: 6a113122f8730963d35260e4d92ca7754fff9f2f
parent eb2baa335d05b790185a8d8752804454a4d07fd9
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date: Wed, 24 Sep 2025 05:55:41 +0200
check for dumps
Diffstat:
2 files changed, 53 insertions(+), 11 deletions(-)
diff --git a/README.md b/README.md
@@ -46,18 +46,20 @@ of the file looks like regular code.
So `checksrc` can take much longer depending on the payloads.
## Detections
+### Minor
+Throws a warning but doesn't stops reading the file:
+
+- dump: block of 10 consecutive lines with lengths varying by less than 3 bytes (to detect hex dumps, base64, …)
+- (planned) more punctuation, symbols, and numbers than letters [a-zA-Z\x7C-\xFF] (within 4KB blocks)
+
### Major
Throws an error, stops reading the file, exits unsuccessfully:
- minified code: average line length of more than 100 characters (within 4KB blocks)
- non-printable character (byte under 0x20 other than `\n`, `\r`, `\t`)
+- 3 blocks of dump (see Minor)
- (planned) string indicating generated code
-### Minor
-Throws a warning but doesn't stops reading the file:
-
-- (planned) more than 10 consecutive lines with lengths varying by less than 2 bytes (to detect hex dumps, base64, …)
-- (planned) more punctuation, symbols, and numbers than letters [a-zA-Z\x7C-\xFF] (within 4KB blocks)
```
SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+checksrc@hacktivis.me>
diff --git a/checksrc.c b/checksrc.c
@@ -41,7 +41,10 @@ checkfile(const char *fname)
return -1;
}
- size_t chars = 0, newlines = 0;
+ size_t lines_len[10];
+ size_t chars = 0, line = 0;
+ size_t last_dump = 0;
+ int dumps = 0;
for(;;)
{
static char buf[4096];
@@ -63,23 +66,60 @@ checkfile(const char *fname)
if(buf[i] == '\t')
{
chars++;
+ lines_len[line % 10]++;
continue;
}
if(buf[i] == '\n')
{
- newlines++;
+ if(line - last_dump > 10)
+ {
+ size_t maxdiff = 0;
+ for(size_t li = 1; li < 10; li++)
+ {
+ size_t diff = 0;
+ if(lines_len[li - 1] > lines_len[li])
+ diff = lines_len[li - 1] - lines_len[li];
+ else
+ diff = lines_len[li] - lines_len[li - 1];
+
+ if(diff > maxdiff) maxdiff = diff;
+ }
+
+ if(maxdiff < 3)
+ {
+ last_dump = line;
+ printf("%s:[%zu - %zu] line length varied only by a maximum of %zu characters\n",
+ fname,
+ line - 10,
+ line,
+ maxdiff);
+
+ if(++dumps == 3)
+ {
+ printf("%s: Found 3 dumps, stopping there\n", fname);
+ return 1;
+ }
+ }
+ }
+
+ line++;
+ lines_len[line % 10] = 0;
continue;
}
if(buf[i] < ' ')
{
- printf("binary (byte 0x%X found at position 0x%zX): %s\n", buf[i], i, fname);
+ printf("%s: binary (byte 0x%X found at position 0x%zX)\n", fname, buf[i], i);
return 1;
}
// somewhat UTF-8 centric
- if(buf[i] < 0x7F) chars++;
+ if(buf[i] < 0x7F)
+ {
+ chars++;
+ lines_len[line % 10]++;
+ }
}
}
@@ -89,10 +129,10 @@ checkfile(const char *fname)
// Consider that lines are on average shorter than 100 character
// One false-positive being formats like SVG, fine for this tool
- size_t lineavg = chars / newlines;
+ size_t lineavg = chars / line;
if(chars > 200 && lineavg > 100)
{
- printf("minified (%zd characters / %zd newlines = %zd): %s\n", chars, newlines, lineavg, fname);
+ printf("%s: minified (%zd characters / %zd newlines = %zd)\n", fname, chars, line, lineavg);
return 1;
}