logo

deblob

remove binary executables from a directory git clone https://hacktivis.me/git/deblob.git
commit: 3863a47d7621e2dc7e49ca53a9617f48ae0e3df6
parent 1cb5050d6bbdaf57c4e46dfd1627b01811ce7830
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Thu,  7 Nov 2024 06:25:26 +0100

Add short name to identify the kind of blob

Diffstat:

Mmain.ha280+++++++++++++++++++++++++++++++++++++++++++------------------------------------
1 file changed, 151 insertions(+), 129 deletions(-)

diff --git a/main.ha b/main.ha @@ -17,68 +17,67 @@ let noop: bool = false; let check: bool = false; const beam: []u8 = ['F', 'O', 'R', '1']; // Erlang BEAM -const magic: [_][]u8 = [ - [0x7F, 'E', 'L', 'F'], // ELF - ['!', '<', 'a', 'r', 'c', 'h', '>', '\n'], // Unix ar(1) - [0x55, 0xAA], // IBM PC BIOS ROM - ['F', 'O', 'R', '1'], // Erlang BEAM - [0xCA, 0xFE, 0xBA, 0xBE], // Java Class File & Mach-O Executable - ['M', 'Z', 0x90, 0x00, 0x03, 0x00, 0x00, 0x00], // Partial MSDOS stub header (ECMA-335 Common Language Infrastructure) - [0x1B, 'L', 'u', 'a'], // Lua bytecode - [0x00, 'a', 's', 'm'], // WebAssembly - ['J', 'o', 'y', '!', 'p', 'e', 'f', 'f'], // Apple Preferred Executable Format - [0xD0, 0x0D, 0xFE, 0xED], // Device Tree Blob (OpenFirmware, u-boot, …) +const magic: [_](str, []u8) = [ + ("ELF", [0x7F, 'E', 'L', 'F']), + ("Unix ar(1)", ['!', '<', 'a', 'r', 'c', 'h', '>', '\n']), + ("PC-BIOS", [0x55, 0xAA]), + ("Erlang FOR1 BEAM", ['F', 'O', 'R', '1']), + ("Java .class / MachO exec", [0xCA, 0xFE, 0xBA, 0xBE]), + ("WinNT EXE", ['M', 'Z', 0x90, 0x00, 0x03, 0x00, 0x00, 0x00]), // Partial MSDOS stub header (ECMA-335 Common Language Infrastructure) + ("Lua bytecode", [0x1B, 'L', 'u', 'a']), + ("Wasm", [0x00, 'a', 's', 'm']), + ("Apple PEF", ['J', 'o', 'y', '!', 'p', 'e', 'f', 'f']), // Apple Preferred Executable Format + ("DTB", [0xD0, 0x0D, 0xFE, 0xED]), // Device Tree Blob (OpenFirmware, u-boot, …) // Python *.pyc bytecode magic numbers (defined in importlib/_bootstrap_external.py) - [0x03, 0xF3, '\r', '\n'], // (62211i little-endian) Python 2.7 - [0x55, 0x0D, '\r', '\n'], // (3413i litte-endian) Python 3.8 - [0x61, 0x0D, '\r', '\n'], // (3425i litte-endian) Python 3.9 - [0x6F, 0x0D, '\r', '\n'], // (3439i litte-endian) Python 3.10 - [0xA7, 0x0D, '\r', '\n'], // (3495i litte-endian) Python 3.11 - [0xCB, 0x0D, '\r', '\n'], // (3531i litte-endian) Python 3.12 + ("Python pyc 2.7", [0x03, 0xF3, '\r', '\n']), // (62211i little-endian) + ("Python pyc 3.8", [0x55, 0x0D, '\r', '\n']), // (3413i litte-endian) + ("Python pyc 3.9", [0x61, 0x0D, '\r', '\n']), // (3425i litte-endian) + ("Python pyc 3.10", [0x6F, 0x0D, '\r', '\n']), // (3439i litte-endian) + ("Python pyc 3.11", [0xA7, 0x0D, '\r', '\n']), // (3495i litte-endian) + ("Python pyc 3.12", [0xCB, 0x0D, '\r', '\n']), // (3531i litte-endian) // Python pickle object data, similarly to Perl Storage it's dangerous enough to cause code execution - [0x80, 0x02], // Protocol 2 + start of frame - [0x80, 0x03], // Protocol 3 + start of frame - [0x80, 0x04, 0x95], // Protocol 4 + start of frame - [0x80, 0x05, 0x95], // Protocol 5 + start of frame - - // MoarVM bytecode https://github.com/MoarVM/MoarVM/blob/master/docs/bytecode.markdown - ['M', 'O', 'A', 'R', 'V', 'M', '\r', '\n'], - // Parrot Bytecode https://github.com/parrot/parrot/blob/master/docs/parrotbyte.pod - [0xFE, 'P', 'B', 'C', '\r', '\n', 0x1A, '\n'], - ['p', 'e', 'r', 'l', '-', 's', 't', 'o', 'r', 'e'], // Perl Storable(v0.6) - ['p', 's', 't', '0'], // Perl Storable(v0.7) - [0x00, 0x00, 0x00, 0x00, 'c', 'h', 'e', 'z'], // Chez Scheme bytecode - ['N', 'E', 'K', 'O'], // NekoVM Bytecode - [';', 'E', 'L', 'C'], // Emacs lisp bytecode, if there is known false positives next 4 bytes is the version - - ['C', 'a', 'm', 'l', '1', '9', '9', '9'], // OCaml - - [0x78, 0x9c], // Ren'Py Archive v1 - ['R', 'P', 'A', '-', '2', '.', '0', ' '], // Ren'Py Archive v2 - ['R', 'P', 'A', '-', '3', '.', '0', ' '], // Ren'Py Archive v3 - - [0xFA, 0xFA], // Squirrel bytecode - - ['C', 'P', 'C', 'H'], + ("Python Pickle v2", [0x80, 0x02]), // Protocol 2 + start of frame + ("Python Pickle v3", [0x80, 0x03]), // Protocol 3 + start of frame + ("Python Pickle v4", [0x80, 0x04, 0x95]), // Protocol 4 + start of frame + ("Python Pickle v5", [0x80, 0x05, 0x95]), // Protocol 5 + start of frame + + // https://github.com/MoarVM/MoarVM/blob/master/docs/bytecode.markdown + ("MoarVM bytecode", ['M', 'O', 'A', 'R', 'V', 'M', '\r', '\n']), + // https://github.com/parrot/parrot/blob/master/docs/parrotbyte.pod + ("Parrot bytecode", [0xFE, 'P', 'B', 'C', '\r', '\n', 0x1A, '\n']), + ("Perl storable v0.6", ['p', 'e', 'r', 'l', '-', 's', 't', 'o', 'r', 'e']), + ("Perl storable v0.7", ['p', 's', 't', '0']), + ("Chez Scheme bytecode", [0x00, 0x00, 0x00, 0x00, 'c', 'h', 'e', 'z']), + ("NekoVM bytecode", ['N', 'E', 'K', 'O']), + ("Emacs Lisp bytecode", [';', 'E', 'L', 'C']), // Emacs lisp bytecode, if there is known false positives next 4 bytes is the version + + ("OCaml", ['C', 'a', 'm', 'l', '1', '9', '9', '9']), + + ("Ren'Py Archive v1", [0x78, 0x9c]), + ("Ren'Py Archive v2", ['R', 'P', 'A', '-', '2', '.', '0', ' ']), + ("Ren'Py Archive v3", ['R', 'P', 'A', '-', '3', '.', '0', ' ']), + + ("Squirrel bytecode", [0xFA, 0xFA]), + + ("Clang Pre-Compiled-Header", ['C', 'P', 'C', 'H']), // Clang Pre-Compiled-Header, followed by Info Block, see: // - clang/lib/Serialization/ASTWriter.cpp ASTWriter::WriteAST // - clang/lib/Serialization/ASTReader.cpp doesntStartWithASTFileMagic // Excluded from fixtures (200KB+), test with: // echo > empty.h && clang -cc1 -nobuiltininc -emit-pch -o empty.h.pch empty.h - ['g', 'p', 'c', 'h'], - // GCC Pre-Compiled-Header + ("GCC Pre-Compiled-Header", ['g', 'p', 'c', 'h']), // Excluded from fixtures (1.2MB+), test with: // echo > empty.h && gcc empty.h - ['G', 'R', 'S', 'T'], // GCC Rust Metadata (*.rox) + ("GCC Rust Metadata", ['G', 'R', 'S', 'T']), // GCC Rust Metadata (*.rox) - [0x90, 0xab, 0xcd, 0xef], // Dart Kernel snapshot + ("Dart Kernel snapshot", [0x90, 0xab, 0xcd, 0xef]), // why are these 2 different, and is the C one still in use? // no clue, but both are in the sdk repo. - [0xdc, 0xdc, 0xf5, 0xf5], // Dart JIT snapshot, if done from the C code. - [0xdc, 0xdc, 0xf6, 0xf6], // Dart JIT snapshot, if done from the Dart code. + ("Dart JIT snapshot (C code)", [0xdc, 0xdc, 0xf5, 0xf5]), // Dart JIT snapshot, if done from the C code. + ("Dart JIT snapshot (Dart code)", [0xdc, 0xdc, 0xf6, 0xf6]), // Dart JIT snapshot, if done from the Dart code. ]; const dos_magic: []u8 = ['M', 'Z']; const pe_magic: []u8 = ['P', 'E', 0x00, 0x00]; @@ -89,7 +88,7 @@ const shebang: []u8 = ['#', '!']; let found: bool = false; -fn is_blob(filename: str) (bool | fs::error | io::error) = { +fn id_blob(filename: str) (void | str | fs::error | io::error) = { static let buffer: [4096]u8 = [0...]; const file = os::open(filename)?; @@ -97,13 +96,13 @@ fn is_blob(filename: str) (bool | fs::error | io::error) = { if (io::read(file, buffer)? is io::EOF) { // empty file - return false; + return void; }; for (let i = 0z; i < len(magic); i += 1) { - assert(len(magic[i]) > 0); - if (bytes::hasprefix(buffer, magic[i])) { - return true; + assert(len(magic[i].1) > 0); + if (bytes::hasprefix(buffer, magic[i].1)) { + return magic[i].0; }; }; @@ -112,7 +111,7 @@ fn is_blob(filename: str) (bool | fs::error | io::error) = { const pe_offset = endian::legetu32(buffer[60..64]); if ((pe_offset <= 4096-4) && bytes::hasprefix(buffer[pe_offset..pe_offset+4], pe_magic)) { - return true; + return "WinNT EXE"; }; }; @@ -130,8 +129,8 @@ fn is_blob(filename: str) (bool | fs::error | io::error) = { comment = true; } else { // First bytes after comments - if(bytes::equal(zip, buffer[i..i+len(zip)])) return true; - if(bytes::equal(beam, buffer[i..i+len(beam)])) return true; + if(bytes::equal(zip, buffer[i..i+len(zip)])) return "Erlang ZIP BEAM"; + if(bytes::equal(beam, buffer[i..i+len(beam)])) return "Erlang #! BEAM"; // source code as script break; @@ -155,81 +154,106 @@ fn is_blob(filename: str) (bool | fs::error | io::error) = { const racket_start = version_end+2; if(bytes::equal(racket, buffer[racket_start..racket_start+racket_len])) { - return true; + return "Racket"; }; }; if (bytes::hasprefix(buffer, zip)) { if(bytes::equal(jar, buffer[0x27..0x2B])) { - return true; + return "Java JAR"; }; }; - return false; + return void; }; -@test fn is_blob() void = { - const tests = [ - (false, "test/fixtures/empty"), - (false, "test/fixtures/empty.dts"), - (false, "test/fixtures/hello-dart.dart"), - (false, "test/fixtures/hello-ocaml.ml"), - (false, "test/fixtures/hello-racket.rkt"), - (false, "test/fixtures/hello.1"), - (false, "test/fixtures/hello.c"), - (false, "test/fixtures/hello.cs"), - (false, "test/fixtures/hello.el"), - (false, "test/fixtures/hello.erl"), - (false, "test/fixtures/hello.erl.escript"), - (false, "test/fixtures/hello.java"), - (false, "test/fixtures/hello.lua"), - (false, "test/fixtures/hello.neko"), - (false, "test/fixtures/hello.nqp"), - (false, "test/fixtures/hello.nut"), - (false, "test/fixtures/hello.pir"), - (false, "test/fixtures/hello.py"), - (false, "test/fixtures/hello.wat"), - (false, "test/fixtures/perl_storage.pm"), - (true, "test/fixtures/compiled/hello-racket_rkt.zo"), - (true, "test/fixtures/empty.dtb"), - (true, "test/fixtures/hello"), - (true, "test/fixtures/hello-dart.dill"), - (true, "test/fixtures/hello-dart.jit"), - (true, "test/fixtures/hello-ocaml.a"), - (true, "test/fixtures/hello-ocaml.cma"), - (true, "test/fixtures/hello-ocaml.cmi"), - (true, "test/fixtures/hello-ocaml.cmo"), - (true, "test/fixtures/hello-ocaml.cmx"), - (true, "test/fixtures/hello-ocaml.cmxa"), - (true, "test/fixtures/hello-ocaml.o"), - (true, "test/fixtures/hello.a"), - (true, "test/fixtures/hello.beam"), - (true, "test/fixtures/hello.beam.escript"), - (true, "test/fixtures/hello.class"), - (true, "test/fixtures/hello.cnut"), - (true, "test/fixtures/hello.elc"), - (true, "test/fixtures/hello.exe"), - (true, "test/fixtures/hello.jar"), - (true, "test/fixtures/hello.luac53"), - (true, "test/fixtures/hello.luac54"), - (true, "test/fixtures/hello.n"), - (true, "test/fixtures/hello.nqp.moarvm"), - (true, "test/fixtures/hello.o"), - (true, "test/fixtures/hello.pir.pbc"), - (true, "test/fixtures/hello.wasm"), - (true, "test/fixtures/monodx.dll"), - (true, "test/fixtures/perl_storage.pst"), - (true, "test/fixtures/pickle/hello.4.pickle"), - (true, "test/fixtures/pickle/hello.5.pickle"), - (true, "test/fixtures/qemu_vga.ndrv"), - //(true, "test/fixtures/option.rom"), +@test fn id_blob() void = { + const sources = [ + "test/fixtures/empty", + "test/fixtures/empty.dts", + "test/fixtures/hello-dart.dart", + "test/fixtures/hello-ocaml.ml", + "test/fixtures/hello-racket.rkt", + "test/fixtures/hello.1", + "test/fixtures/hello.c", + "test/fixtures/hello.cs", + "test/fixtures/hello.el", + "test/fixtures/hello.erl", + "test/fixtures/hello.erl.escript", + "test/fixtures/hello.java", + "test/fixtures/hello.lua", + "test/fixtures/hello.neko", + "test/fixtures/hello.nqp", + "test/fixtures/hello.nut", + "test/fixtures/hello.pir", + "test/fixtures/hello.py", + "test/fixtures/hello.wat", + "test/fixtures/perl_storage.pm", ]; - for (let i = 0z; i < len(tests); i += 1) { - const result = is_blob(tests[i].1)!; - if (result != tests[i].0) { - fmt::fatalf("deblob: is_blob({}) was incorrect, got {}, expected {}", - tests[i].1, result, tests[i].0); + for (let i = 0z; i < len(sources); i += 1) { + match(id_blob(sources[i])!) { + case void => + continue; + case let s: str => + fmt::fatalf( + "deblob: id_blob({}) got wrongly detected as: {}", + sources[i], s + ); + }; + }; + + const blobs = [ + ("Racket", "test/fixtures/compiled/hello-racket_rkt.zo"), + ("DTB", "test/fixtures/empty.dtb"), + ("ELF", "test/fixtures/hello"), + ("Dart Kernel snapshot", "test/fixtures/hello-dart.dill"), + ("Dart JIT snapshot (Dart code)", "test/fixtures/hello-dart.jit"), + ("Unix ar(1)", "test/fixtures/hello-ocaml.a"), + ("OCaml", "test/fixtures/hello-ocaml.cma"), + ("OCaml", "test/fixtures/hello-ocaml.cmi"), + ("OCaml", "test/fixtures/hello-ocaml.cmo"), + ("OCaml", "test/fixtures/hello-ocaml.cmx"), + ("OCaml", "test/fixtures/hello-ocaml.cmxa"), + ("ELF", "test/fixtures/hello-ocaml.o"), + ("Unix ar(1)", "test/fixtures/hello.a"), + ("Erlang FOR1 BEAM", "test/fixtures/hello.beam"), + ("Erlang #! BEAM", "test/fixtures/hello.beam.escript"), + ("Java .class / MachO exec", "test/fixtures/hello.class"), + ("Squirrel bytecode", "test/fixtures/hello.cnut"), + ("Emacs Lisp bytecode", "test/fixtures/hello.elc"), + ("WinNT EXE", "test/fixtures/hello.exe"), + ("Java JAR", "test/fixtures/hello.jar"), + ("Lua bytecode", "test/fixtures/hello.luac53"), + ("Lua bytecode", "test/fixtures/hello.luac54"), + ("NekoVM bytecode", "test/fixtures/hello.n"), + ("MoarVM bytecode", "test/fixtures/hello.nqp.moarvm"), + ("ELF", "test/fixtures/hello.o"), + ("Parrot bytecode", "test/fixtures/hello.pir.pbc"), + ("Wasm", "test/fixtures/hello.wasm"), + ("WinNT EXE", "test/fixtures/monodx.dll"), + ("Perl storable v0.7", "test/fixtures/perl_storage.pst"), + ("Python Pickle v4", "test/fixtures/pickle/hello.4.pickle"), + ("Python Pickle v5", "test/fixtures/pickle/hello.5.pickle"), + ("Apple PEF", "test/fixtures/qemu_vga.ndrv"), + //("", "test/fixtures/option.rom"), + ]; + + for (let i = 0z; i < len(blobs); i += 1) { + match(id_blob(blobs[i].1)!) { + case void => + fmt::fatalf( + "deblob: id_blob({}) didn't got detected as: {}", + blobs[i].1, blobs[i].0 + ); + case let s: str => + if(s != blobs[i].0) + { + fmt::fatalf( + "deblob: id_blob({}) got identified as \"{}\" instead of \"{}\"", + blobs[i].1, s, blobs[i].0 + ); + }; }; }; }; @@ -270,9 +294,11 @@ fn check_dir(dirname: str) (void | errors::invalid | io::error) = { if (fs::isdir(ent.ftype)) { check_dir(filename)?; } else if(fs::isfile(ent.ftype)) { - const is_blob = match (is_blob(filename)) { - case let b: bool => - yield b; + const blob_id = match (id_blob(filename)) { + case void => + continue; + case let s: str => + yield s; case let err: fs::error => fmt::errorfln("deblob: Error opening {}: {}", filename, fs::strerror(err))!; @@ -283,29 +309,25 @@ fn check_dir(dirname: str) (void | errors::invalid | io::error) = { continue; }; - if (!is_blob) { - continue; - }; - if (is_excluded(filename)) { - fmt::printfln("ignoring: {}", filename)!; + fmt::printfln("ignoring {}:\t{}", blob_id, filename)!; continue; }; found = true; if (noop) { - fmt::printfln("detected: {}", filename)!; + fmt::printfln("detected {}:\t{}", blob_id, filename)!; continue; }; - fmt::printfln("removing: {}", filename)!; + fmt::printfln("removing {}:\t{}", blob_id, filename)!; match (os::remove(filename)) { case void => continue; case let e: fs::error => - fmt::errorfln("deblob: os::remove({}): {}", + fmt::errorfln("deblob: os::remove({}):\t{}", filename, fs::strerror(e))!; }; } else {