commit: 3863a47d7621e2dc7e49ca53a9617f48ae0e3df6
parent 1cb5050d6bbdaf57c4e46dfd1627b01811ce7830
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date: Thu, 7 Nov 2024 06:25:26 +0100
Add short name to identify the kind of blob
Diffstat:
M | main.ha | 280 | +++++++++++++++++++++++++++++++++++++++++++------------------------------------ |
1 file changed, 151 insertions(+), 129 deletions(-)
diff --git a/main.ha b/main.ha
@@ -17,68 +17,67 @@ let noop: bool = false;
let check: bool = false;
const beam: []u8 = ['F', 'O', 'R', '1']; // Erlang BEAM
-const magic: [_][]u8 = [
- [0x7F, 'E', 'L', 'F'], // ELF
- ['!', '<', 'a', 'r', 'c', 'h', '>', '\n'], // Unix ar(1)
- [0x55, 0xAA], // IBM PC BIOS ROM
- ['F', 'O', 'R', '1'], // Erlang BEAM
- [0xCA, 0xFE, 0xBA, 0xBE], // Java Class File & Mach-O Executable
- ['M', 'Z', 0x90, 0x00, 0x03, 0x00, 0x00, 0x00], // Partial MSDOS stub header (ECMA-335 Common Language Infrastructure)
- [0x1B, 'L', 'u', 'a'], // Lua bytecode
- [0x00, 'a', 's', 'm'], // WebAssembly
- ['J', 'o', 'y', '!', 'p', 'e', 'f', 'f'], // Apple Preferred Executable Format
- [0xD0, 0x0D, 0xFE, 0xED], // Device Tree Blob (OpenFirmware, u-boot, …)
+const magic: [_](str, []u8) = [
+ ("ELF", [0x7F, 'E', 'L', 'F']),
+ ("Unix ar(1)", ['!', '<', 'a', 'r', 'c', 'h', '>', '\n']),
+ ("PC-BIOS", [0x55, 0xAA]),
+ ("Erlang FOR1 BEAM", ['F', 'O', 'R', '1']),
+ ("Java .class / MachO exec", [0xCA, 0xFE, 0xBA, 0xBE]),
+ ("WinNT EXE", ['M', 'Z', 0x90, 0x00, 0x03, 0x00, 0x00, 0x00]), // Partial MSDOS stub header (ECMA-335 Common Language Infrastructure)
+ ("Lua bytecode", [0x1B, 'L', 'u', 'a']),
+ ("Wasm", [0x00, 'a', 's', 'm']),
+ ("Apple PEF", ['J', 'o', 'y', '!', 'p', 'e', 'f', 'f']), // Apple Preferred Executable Format
+ ("DTB", [0xD0, 0x0D, 0xFE, 0xED]), // Device Tree Blob (OpenFirmware, u-boot, …)
// Python *.pyc bytecode magic numbers (defined in importlib/_bootstrap_external.py)
- [0x03, 0xF3, '\r', '\n'], // (62211i little-endian) Python 2.7
- [0x55, 0x0D, '\r', '\n'], // (3413i litte-endian) Python 3.8
- [0x61, 0x0D, '\r', '\n'], // (3425i litte-endian) Python 3.9
- [0x6F, 0x0D, '\r', '\n'], // (3439i litte-endian) Python 3.10
- [0xA7, 0x0D, '\r', '\n'], // (3495i litte-endian) Python 3.11
- [0xCB, 0x0D, '\r', '\n'], // (3531i litte-endian) Python 3.12
+ ("Python pyc 2.7", [0x03, 0xF3, '\r', '\n']), // (62211i little-endian)
+ ("Python pyc 3.8", [0x55, 0x0D, '\r', '\n']), // (3413i litte-endian)
+ ("Python pyc 3.9", [0x61, 0x0D, '\r', '\n']), // (3425i litte-endian)
+ ("Python pyc 3.10", [0x6F, 0x0D, '\r', '\n']), // (3439i litte-endian)
+ ("Python pyc 3.11", [0xA7, 0x0D, '\r', '\n']), // (3495i litte-endian)
+ ("Python pyc 3.12", [0xCB, 0x0D, '\r', '\n']), // (3531i litte-endian)
// Python pickle object data, similarly to Perl Storage it's dangerous enough to cause code execution
- [0x80, 0x02], // Protocol 2 + start of frame
- [0x80, 0x03], // Protocol 3 + start of frame
- [0x80, 0x04, 0x95], // Protocol 4 + start of frame
- [0x80, 0x05, 0x95], // Protocol 5 + start of frame
-
- // MoarVM bytecode https://github.com/MoarVM/MoarVM/blob/master/docs/bytecode.markdown
- ['M', 'O', 'A', 'R', 'V', 'M', '\r', '\n'],
- // Parrot Bytecode https://github.com/parrot/parrot/blob/master/docs/parrotbyte.pod
- [0xFE, 'P', 'B', 'C', '\r', '\n', 0x1A, '\n'],
- ['p', 'e', 'r', 'l', '-', 's', 't', 'o', 'r', 'e'], // Perl Storable(v0.6)
- ['p', 's', 't', '0'], // Perl Storable(v0.7)
- [0x00, 0x00, 0x00, 0x00, 'c', 'h', 'e', 'z'], // Chez Scheme bytecode
- ['N', 'E', 'K', 'O'], // NekoVM Bytecode
- [';', 'E', 'L', 'C'], // Emacs lisp bytecode, if there is known false positives next 4 bytes is the version
-
- ['C', 'a', 'm', 'l', '1', '9', '9', '9'], // OCaml
-
- [0x78, 0x9c], // Ren'Py Archive v1
- ['R', 'P', 'A', '-', '2', '.', '0', ' '], // Ren'Py Archive v2
- ['R', 'P', 'A', '-', '3', '.', '0', ' '], // Ren'Py Archive v3
-
- [0xFA, 0xFA], // Squirrel bytecode
-
- ['C', 'P', 'C', 'H'],
+ ("Python Pickle v2", [0x80, 0x02]), // Protocol 2 + start of frame
+ ("Python Pickle v3", [0x80, 0x03]), // Protocol 3 + start of frame
+ ("Python Pickle v4", [0x80, 0x04, 0x95]), // Protocol 4 + start of frame
+ ("Python Pickle v5", [0x80, 0x05, 0x95]), // Protocol 5 + start of frame
+
+ // https://github.com/MoarVM/MoarVM/blob/master/docs/bytecode.markdown
+ ("MoarVM bytecode", ['M', 'O', 'A', 'R', 'V', 'M', '\r', '\n']),
+ // https://github.com/parrot/parrot/blob/master/docs/parrotbyte.pod
+ ("Parrot bytecode", [0xFE, 'P', 'B', 'C', '\r', '\n', 0x1A, '\n']),
+ ("Perl storable v0.6", ['p', 'e', 'r', 'l', '-', 's', 't', 'o', 'r', 'e']),
+ ("Perl storable v0.7", ['p', 's', 't', '0']),
+ ("Chez Scheme bytecode", [0x00, 0x00, 0x00, 0x00, 'c', 'h', 'e', 'z']),
+ ("NekoVM bytecode", ['N', 'E', 'K', 'O']),
+ ("Emacs Lisp bytecode", [';', 'E', 'L', 'C']), // Emacs lisp bytecode, if there is known false positives next 4 bytes is the version
+
+ ("OCaml", ['C', 'a', 'm', 'l', '1', '9', '9', '9']),
+
+ ("Ren'Py Archive v1", [0x78, 0x9c]),
+ ("Ren'Py Archive v2", ['R', 'P', 'A', '-', '2', '.', '0', ' ']),
+ ("Ren'Py Archive v3", ['R', 'P', 'A', '-', '3', '.', '0', ' ']),
+
+ ("Squirrel bytecode", [0xFA, 0xFA]),
+
+ ("Clang Pre-Compiled-Header", ['C', 'P', 'C', 'H']),
// Clang Pre-Compiled-Header, followed by Info Block, see:
// - clang/lib/Serialization/ASTWriter.cpp ASTWriter::WriteAST
// - clang/lib/Serialization/ASTReader.cpp doesntStartWithASTFileMagic
// Excluded from fixtures (200KB+), test with:
// echo > empty.h && clang -cc1 -nobuiltininc -emit-pch -o empty.h.pch empty.h
- ['g', 'p', 'c', 'h'],
- // GCC Pre-Compiled-Header
+ ("GCC Pre-Compiled-Header", ['g', 'p', 'c', 'h']),
// Excluded from fixtures (1.2MB+), test with:
// echo > empty.h && gcc empty.h
- ['G', 'R', 'S', 'T'], // GCC Rust Metadata (*.rox)
+ ("GCC Rust Metadata", ['G', 'R', 'S', 'T']), // GCC Rust Metadata (*.rox)
- [0x90, 0xab, 0xcd, 0xef], // Dart Kernel snapshot
+ ("Dart Kernel snapshot", [0x90, 0xab, 0xcd, 0xef]),
// why are these 2 different, and is the C one still in use?
// no clue, but both are in the sdk repo.
- [0xdc, 0xdc, 0xf5, 0xf5], // Dart JIT snapshot, if done from the C code.
- [0xdc, 0xdc, 0xf6, 0xf6], // Dart JIT snapshot, if done from the Dart code.
+ ("Dart JIT snapshot (C code)", [0xdc, 0xdc, 0xf5, 0xf5]), // Dart JIT snapshot, if done from the C code.
+ ("Dart JIT snapshot (Dart code)", [0xdc, 0xdc, 0xf6, 0xf6]), // Dart JIT snapshot, if done from the Dart code.
];
const dos_magic: []u8 = ['M', 'Z'];
const pe_magic: []u8 = ['P', 'E', 0x00, 0x00];
@@ -89,7 +88,7 @@ const shebang: []u8 = ['#', '!'];
let found: bool = false;
-fn is_blob(filename: str) (bool | fs::error | io::error) = {
+fn id_blob(filename: str) (void | str | fs::error | io::error) = {
static let buffer: [4096]u8 = [0...];
const file = os::open(filename)?;
@@ -97,13 +96,13 @@ fn is_blob(filename: str) (bool | fs::error | io::error) = {
if (io::read(file, buffer)? is io::EOF) {
// empty file
- return false;
+ return void;
};
for (let i = 0z; i < len(magic); i += 1) {
- assert(len(magic[i]) > 0);
- if (bytes::hasprefix(buffer, magic[i])) {
- return true;
+ assert(len(magic[i].1) > 0);
+ if (bytes::hasprefix(buffer, magic[i].1)) {
+ return magic[i].0;
};
};
@@ -112,7 +111,7 @@ fn is_blob(filename: str) (bool | fs::error | io::error) = {
const pe_offset = endian::legetu32(buffer[60..64]);
if ((pe_offset <= 4096-4) && bytes::hasprefix(buffer[pe_offset..pe_offset+4], pe_magic)) {
- return true;
+ return "WinNT EXE";
};
};
@@ -130,8 +129,8 @@ fn is_blob(filename: str) (bool | fs::error | io::error) = {
comment = true;
} else {
// First bytes after comments
- if(bytes::equal(zip, buffer[i..i+len(zip)])) return true;
- if(bytes::equal(beam, buffer[i..i+len(beam)])) return true;
+ if(bytes::equal(zip, buffer[i..i+len(zip)])) return "Erlang ZIP BEAM";
+ if(bytes::equal(beam, buffer[i..i+len(beam)])) return "Erlang #! BEAM";
// source code as script
break;
@@ -155,81 +154,106 @@ fn is_blob(filename: str) (bool | fs::error | io::error) = {
const racket_start = version_end+2;
if(bytes::equal(racket, buffer[racket_start..racket_start+racket_len])) {
- return true;
+ return "Racket";
};
};
if (bytes::hasprefix(buffer, zip)) {
if(bytes::equal(jar, buffer[0x27..0x2B])) {
- return true;
+ return "Java JAR";
};
};
- return false;
+ return void;
};
-@test fn is_blob() void = {
- const tests = [
- (false, "test/fixtures/empty"),
- (false, "test/fixtures/empty.dts"),
- (false, "test/fixtures/hello-dart.dart"),
- (false, "test/fixtures/hello-ocaml.ml"),
- (false, "test/fixtures/hello-racket.rkt"),
- (false, "test/fixtures/hello.1"),
- (false, "test/fixtures/hello.c"),
- (false, "test/fixtures/hello.cs"),
- (false, "test/fixtures/hello.el"),
- (false, "test/fixtures/hello.erl"),
- (false, "test/fixtures/hello.erl.escript"),
- (false, "test/fixtures/hello.java"),
- (false, "test/fixtures/hello.lua"),
- (false, "test/fixtures/hello.neko"),
- (false, "test/fixtures/hello.nqp"),
- (false, "test/fixtures/hello.nut"),
- (false, "test/fixtures/hello.pir"),
- (false, "test/fixtures/hello.py"),
- (false, "test/fixtures/hello.wat"),
- (false, "test/fixtures/perl_storage.pm"),
- (true, "test/fixtures/compiled/hello-racket_rkt.zo"),
- (true, "test/fixtures/empty.dtb"),
- (true, "test/fixtures/hello"),
- (true, "test/fixtures/hello-dart.dill"),
- (true, "test/fixtures/hello-dart.jit"),
- (true, "test/fixtures/hello-ocaml.a"),
- (true, "test/fixtures/hello-ocaml.cma"),
- (true, "test/fixtures/hello-ocaml.cmi"),
- (true, "test/fixtures/hello-ocaml.cmo"),
- (true, "test/fixtures/hello-ocaml.cmx"),
- (true, "test/fixtures/hello-ocaml.cmxa"),
- (true, "test/fixtures/hello-ocaml.o"),
- (true, "test/fixtures/hello.a"),
- (true, "test/fixtures/hello.beam"),
- (true, "test/fixtures/hello.beam.escript"),
- (true, "test/fixtures/hello.class"),
- (true, "test/fixtures/hello.cnut"),
- (true, "test/fixtures/hello.elc"),
- (true, "test/fixtures/hello.exe"),
- (true, "test/fixtures/hello.jar"),
- (true, "test/fixtures/hello.luac53"),
- (true, "test/fixtures/hello.luac54"),
- (true, "test/fixtures/hello.n"),
- (true, "test/fixtures/hello.nqp.moarvm"),
- (true, "test/fixtures/hello.o"),
- (true, "test/fixtures/hello.pir.pbc"),
- (true, "test/fixtures/hello.wasm"),
- (true, "test/fixtures/monodx.dll"),
- (true, "test/fixtures/perl_storage.pst"),
- (true, "test/fixtures/pickle/hello.4.pickle"),
- (true, "test/fixtures/pickle/hello.5.pickle"),
- (true, "test/fixtures/qemu_vga.ndrv"),
- //(true, "test/fixtures/option.rom"),
+@test fn id_blob() void = {
+ const sources = [
+ "test/fixtures/empty",
+ "test/fixtures/empty.dts",
+ "test/fixtures/hello-dart.dart",
+ "test/fixtures/hello-ocaml.ml",
+ "test/fixtures/hello-racket.rkt",
+ "test/fixtures/hello.1",
+ "test/fixtures/hello.c",
+ "test/fixtures/hello.cs",
+ "test/fixtures/hello.el",
+ "test/fixtures/hello.erl",
+ "test/fixtures/hello.erl.escript",
+ "test/fixtures/hello.java",
+ "test/fixtures/hello.lua",
+ "test/fixtures/hello.neko",
+ "test/fixtures/hello.nqp",
+ "test/fixtures/hello.nut",
+ "test/fixtures/hello.pir",
+ "test/fixtures/hello.py",
+ "test/fixtures/hello.wat",
+ "test/fixtures/perl_storage.pm",
];
- for (let i = 0z; i < len(tests); i += 1) {
- const result = is_blob(tests[i].1)!;
- if (result != tests[i].0) {
- fmt::fatalf("deblob: is_blob({}) was incorrect, got {}, expected {}",
- tests[i].1, result, tests[i].0);
+ for (let i = 0z; i < len(sources); i += 1) {
+ match(id_blob(sources[i])!) {
+ case void =>
+ continue;
+ case let s: str =>
+ fmt::fatalf(
+ "deblob: id_blob({}) got wrongly detected as: {}",
+ sources[i], s
+ );
+ };
+ };
+
+ const blobs = [
+ ("Racket", "test/fixtures/compiled/hello-racket_rkt.zo"),
+ ("DTB", "test/fixtures/empty.dtb"),
+ ("ELF", "test/fixtures/hello"),
+ ("Dart Kernel snapshot", "test/fixtures/hello-dart.dill"),
+ ("Dart JIT snapshot (Dart code)", "test/fixtures/hello-dart.jit"),
+ ("Unix ar(1)", "test/fixtures/hello-ocaml.a"),
+ ("OCaml", "test/fixtures/hello-ocaml.cma"),
+ ("OCaml", "test/fixtures/hello-ocaml.cmi"),
+ ("OCaml", "test/fixtures/hello-ocaml.cmo"),
+ ("OCaml", "test/fixtures/hello-ocaml.cmx"),
+ ("OCaml", "test/fixtures/hello-ocaml.cmxa"),
+ ("ELF", "test/fixtures/hello-ocaml.o"),
+ ("Unix ar(1)", "test/fixtures/hello.a"),
+ ("Erlang FOR1 BEAM", "test/fixtures/hello.beam"),
+ ("Erlang #! BEAM", "test/fixtures/hello.beam.escript"),
+ ("Java .class / MachO exec", "test/fixtures/hello.class"),
+ ("Squirrel bytecode", "test/fixtures/hello.cnut"),
+ ("Emacs Lisp bytecode", "test/fixtures/hello.elc"),
+ ("WinNT EXE", "test/fixtures/hello.exe"),
+ ("Java JAR", "test/fixtures/hello.jar"),
+ ("Lua bytecode", "test/fixtures/hello.luac53"),
+ ("Lua bytecode", "test/fixtures/hello.luac54"),
+ ("NekoVM bytecode", "test/fixtures/hello.n"),
+ ("MoarVM bytecode", "test/fixtures/hello.nqp.moarvm"),
+ ("ELF", "test/fixtures/hello.o"),
+ ("Parrot bytecode", "test/fixtures/hello.pir.pbc"),
+ ("Wasm", "test/fixtures/hello.wasm"),
+ ("WinNT EXE", "test/fixtures/monodx.dll"),
+ ("Perl storable v0.7", "test/fixtures/perl_storage.pst"),
+ ("Python Pickle v4", "test/fixtures/pickle/hello.4.pickle"),
+ ("Python Pickle v5", "test/fixtures/pickle/hello.5.pickle"),
+ ("Apple PEF", "test/fixtures/qemu_vga.ndrv"),
+ //("", "test/fixtures/option.rom"),
+ ];
+
+ for (let i = 0z; i < len(blobs); i += 1) {
+ match(id_blob(blobs[i].1)!) {
+ case void =>
+ fmt::fatalf(
+ "deblob: id_blob({}) didn't got detected as: {}",
+ blobs[i].1, blobs[i].0
+ );
+ case let s: str =>
+ if(s != blobs[i].0)
+ {
+ fmt::fatalf(
+ "deblob: id_blob({}) got identified as \"{}\" instead of \"{}\"",
+ blobs[i].1, s, blobs[i].0
+ );
+ };
};
};
};
@@ -270,9 +294,11 @@ fn check_dir(dirname: str) (void | errors::invalid | io::error) = {
if (fs::isdir(ent.ftype)) {
check_dir(filename)?;
} else if(fs::isfile(ent.ftype)) {
- const is_blob = match (is_blob(filename)) {
- case let b: bool =>
- yield b;
+ const blob_id = match (id_blob(filename)) {
+ case void =>
+ continue;
+ case let s: str =>
+ yield s;
case let err: fs::error =>
fmt::errorfln("deblob: Error opening {}: {}",
filename, fs::strerror(err))!;
@@ -283,29 +309,25 @@ fn check_dir(dirname: str) (void | errors::invalid | io::error) = {
continue;
};
- if (!is_blob) {
- continue;
- };
-
if (is_excluded(filename)) {
- fmt::printfln("ignoring: {}", filename)!;
+ fmt::printfln("ignoring {}:\t{}", blob_id, filename)!;
continue;
};
found = true;
if (noop) {
- fmt::printfln("detected: {}", filename)!;
+ fmt::printfln("detected {}:\t{}", blob_id, filename)!;
continue;
};
- fmt::printfln("removing: {}", filename)!;
+ fmt::printfln("removing {}:\t{}", blob_id, filename)!;
match (os::remove(filename)) {
case void =>
continue;
case let e: fs::error =>
- fmt::errorfln("deblob: os::remove({}): {}",
+ fmt::errorfln("deblob: os::remove({}):\t{}",
filename, fs::strerror(e))!;
};
} else {