commit: 8a2982b7c9109eb3a0336f958015a95cd314aeda
parent 2979f91180c081b6e2e8e1f591ba21120b4a434d
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date: Wed, 24 Sep 2025 07:20:38 +0200
check amount of punctuation+numbers vs. letters
Diffstat:
2 files changed, 35 insertions(+), 7 deletions(-)
diff --git a/README.md b/README.md
@@ -50,7 +50,7 @@ So `checksrc` can take much longer depending on the payloads.
Throws a warning but doesn't stops reading the file:
- dump: block of 10 consecutive lines with lengths varying by less than 3 bytes (to detect hex dumps, base64, …)
-- (planned) more punctuation, symbols, and numbers than letters [a-zA-Z\x7C-\xFF] (within 4KB blocks)
+- more punctuation, symbols, and numbers than letters (within 4KB blocks)
### Major
Throws an error, stops reading the file, exits unsuccessfully:
diff --git a/checksrc.c b/checksrc.c
@@ -1,15 +1,16 @@
// SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+checksrc@hacktivis.me>
// SPDX-License-Identifier: MPL-2.0
#define _DEFAULT_SOURCE
+#include <ctype.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h> // PATH_MAX
+#include <stdbool.h>
+#include <stdint.h> // uint8_t
#include <stdio.h>
#include <string.h>
#include <unistd.h>
-#include <stdbool.h>
-#include <stdint.h> // uint8_t
#if 0
static const char *generated[] = {
@@ -48,10 +49,12 @@ checkfile(const char *fname)
size_t lines_len[10];
size_t chars = 0, line = 0;
size_t last_dump = 0;
- int dumps = 0;
+ unsigned int dumps = 0;
+ off_t pos = 0;
for(;;)
{
static uint8_t buf[4096];
+ unsigned int puncts = 0, numbers = 0, letters = 0;
int nread = read(fd, &buf, 4096);
if(nread == 0) break;
@@ -113,8 +116,11 @@ checkfile(const char *fname)
continue;
}
- /* control characters. Except 0x1B ('^[', ESC). With \n, \r taken care off earlier */
- if(buf[i] < ' ' && buf[i] != 0x1B)
+ /* ('\e', ESC) */
+ if(buf[i] == 0x1B) continue;
+
+ /* control characters. With \r, \t, \n, \e taken care off earlier */
+ if(buf[i] < ' ')
{
printf("%s: binary (byte 0x%X found at position 0x%zX)\n", fname, buf[i], i);
close(fd);
@@ -126,8 +132,30 @@ checkfile(const char *fname)
{
chars++;
lines_len[line % 10]++;
+
+ if(buf[i] == ' ') continue;
+
+ if(isdigit(buf[i]))
+ numbers++;
+ else if(isalpha(buf[i]))
+ letters++;
+ else
+ puncts++;
}
}
+
+ if(puncts + numbers > letters)
+ {
+ printf("%s (block 0x%lX to 0x%lX): Got more punctuation(%d) + numbers(%d) than letters(%d)\n",
+ fname,
+ pos,
+ pos + nread,
+ puncts,
+ numbers,
+ letters);
+ }
+
+ pos += nread;
}
// TODO: Check for a "generated" string
@@ -214,7 +242,7 @@ checkdir(const char *dirname)
continue;
}
if(err < 0) return 1;
- if(err > 0) ret = 1+err;
+ if(err > 0) ret = 1 + err;
}
}
}