main.ha (14763B)
- // Copyright © 2019 deblob Authors <https://hacktivis.me/projects/deblob>
- // SPDX-License-Identifier: BSD-3-Clause
- use bytes;
- use encoding::json;
- use endian;
- use errors;
- use fmt;
- use fnmatch;
- use fs;
- use getopt;
- use io;
- use os;
- use path;
- use strings;
- let excludes: []str = [];
- let noop: bool = false;
- let check: bool = false;
- let json: bool = false;
- const beam: []u8 = ['F', 'O', 'R', '1']; // Erlang BEAM
- const magic: [_](str, []u8) = [
- ("ELF", [0x7F, 'E', 'L', 'F']),
- ("Unix ar(1)", ['!', '<', 'a', 'r', 'c', 'h', '>', '\n']),
- ("PC-BIOS", [0x55, 0xAA]),
- ("Erlang FOR1 BEAM", ['F', 'O', 'R', '1']),
- ("Java .class / Mach-O exec", [0xCA, 0xFE, 0xBA, 0xBE]),
- ("Mach-O exec", [0xCF, 0xFA, 0xED, 0xFE]),
- ("WinNT EXE", ['M', 'Z', 0x90, 0x00, 0x03, 0x00, 0x00, 0x00]), // Partial MSDOS stub header (ECMA-335 Common Language Infrastructure)
- ("Lua bytecode", [0x1B, 'L', 'u', 'a']),
- ("Wasm", [0x00, 'a', 's', 'm']),
- ("Apple PEF", ['J', 'o', 'y', '!', 'p', 'e', 'f', 'f']), // Apple Preferred Executable Format
- ("DTB", [0xD0, 0x0D, 0xFE, 0xED]), // Device Tree Blob (OpenFirmware, u-boot, …)
- // Python *.pyc bytecode magic numbers (defined in importlib/_bootstrap_external.py)
- ("Python pyc 2.7", [0x03, 0xF3, '\r', '\n']), // (62211i little-endian)
- ("Python pyc 3.8", [0x55, 0x0D, '\r', '\n']), // (3413i litte-endian)
- ("Python pyc 3.9", [0x61, 0x0D, '\r', '\n']), // (3425i litte-endian)
- ("Python pyc 3.10", [0x6F, 0x0D, '\r', '\n']), // (3439i litte-endian)
- ("Python pyc 3.11", [0xA7, 0x0D, '\r', '\n']), // (3495i litte-endian)
- ("Python pyc 3.12", [0xCB, 0x0D, '\r', '\n']), // (3531i litte-endian)
- // Python pickle object data, similarly to Perl Storage it's dangerous enough to cause code execution
- ("Python Pickle v2", [0x80, 0x02]), // Protocol 2 + start of frame
- ("Python Pickle v3", [0x80, 0x03]), // Protocol 3 + start of frame
- ("Python Pickle v4", [0x80, 0x04, 0x95]), // Protocol 4 + start of frame
- ("Python Pickle v5", [0x80, 0x05, 0x95]), // Protocol 5 + start of frame
- // https://github.com/MoarVM/MoarVM/blob/master/docs/bytecode.markdown
- ("MoarVM bytecode", ['M', 'O', 'A', 'R', 'V', 'M', '\r', '\n']),
- // https://github.com/parrot/parrot/blob/master/docs/parrotbyte.pod
- ("Parrot bytecode", [0xFE, 'P', 'B', 'C', '\r', '\n', 0x1A, '\n']),
- ("Perl storable v0.6", ['p', 'e', 'r', 'l', '-', 's', 't', 'o', 'r', 'e']),
- ("Perl storable v0.7", ['p', 's', 't', '0']),
- ("Chez Scheme bytecode", [0x00, 0x00, 0x00, 0x00, 'c', 'h', 'e', 'z']),
- ("NekoVM bytecode", ['N', 'E', 'K', 'O']),
- ("Emacs Lisp bytecode", [';', 'E', 'L', 'C']), // Emacs lisp bytecode, if there is known false positives next 4 bytes is the version
- ("OCaml", ['C', 'a', 'm', 'l', '1', '9', '9', '9']),
- ("Ren'Py Archive v1", [0x78, 0x9c]),
- ("Ren'Py Archive v2", ['R', 'P', 'A', '-', '2', '.', '0', ' ']),
- ("Ren'Py Archive v3", ['R', 'P', 'A', '-', '3', '.', '0', ' ']),
- ("Squirrel bytecode", [0xFA, 0xFA]),
- ("Clang Pre-Compiled-Header", ['C', 'P', 'C', 'H']),
- // Clang Pre-Compiled-Header, followed by Info Block, see:
- // - clang/lib/Serialization/ASTWriter.cpp ASTWriter::WriteAST
- // - clang/lib/Serialization/ASTReader.cpp doesntStartWithASTFileMagic
- // Excluded from fixtures (200KB+), test with:
- // echo > empty.h && clang -cc1 -nobuiltininc -emit-pch -o empty.h.pch empty.h
- ("GCC Pre-Compiled-Header", ['g', 'p', 'c', 'h']),
- // Excluded from fixtures (1.2MB+), test with:
- // echo > empty.h && gcc empty.h
- ("GCC Rust Metadata", ['G', 'R', 'S', 'T']), // GCC Rust Metadata (*.rox)
- ("Dart Kernel snapshot", [0x90, 0xab, 0xcd, 0xef]),
- // why are these 2 different, and is the C one still in use?
- // no clue, but both are in the sdk repo.
- ("Dart JIT snapshot (C code)", [0xdc, 0xdc, 0xf5, 0xf5]), // Dart JIT snapshot, if done from the C code.
- ("Dart JIT snapshot (Dart code)", [0xdc, 0xdc, 0xf6, 0xf6]), // Dart JIT snapshot, if done from the Dart code.
- ];
- const dos_magic: []u8 = ['M', 'Z'];
- const pe_magic: []u8 = ['P', 'E', 0x00, 0x00];
- const racket: []u8 = ['r', 'a', 'c', 'k', 'e', 't'];
- const zip: []u8 = ['P', 'K', 0x03, 0x04];
- const jar: []u8 = [0xFE, 0xCA, 0, 0];
- const shebang: []u8 = ['#', '!'];
- let found: bool = false;
- let json_out: io::handle = 0;
- fn id_blob(filename: str) (void | str | fs::error | io::error) = {
- static let buffer: [4096]u8 = [0...];
- const file = os::open(filename)?;
- defer io::close(file)!;
- if (io::read(file, buffer)? is io::EOF) {
- // empty file
- return void;
- };
- for (let i = 0z; i < len(magic); i += 1) {
- assert(len(magic[i].1) > 0);
- if (bytes::hasprefix(buffer, magic[i].1)) {
- return magic[i].0;
- };
- };
- // Special check to detect *all* Microsoft Portable Executable files
- if (bytes::hasprefix(buffer, dos_magic)) {
- const pe_offset = endian::legetu32(buffer[60..64]);
- if ((pe_offset <= 4096-4) && bytes::hasprefix(buffer[pe_offset..pe_offset+4], pe_magic)) {
- return "WinNT EXE";
- };
- };
- // detect binary escripts (PKZIP archive and BEAM supported)
- if (bytes::hasprefix(buffer, shebang)) {
- let comment = true;
- for (let i = 0z; i < 4096; i += 1) {
- if(comment) {
- if(buffer[i] == '\n') comment = false;
- continue;
- };
- if(buffer[i] == '%') {
- comment = true;
- } else {
- // First bytes after comments
- if(bytes::equal(zip, buffer[i..i+len(zip)])) return "Erlang ZIP BEAM";
- if(bytes::equal(beam, buffer[i..i+len(beam)])) return "Erlang #! BEAM";
- // source code as script
- break;
- };
- };
- };
- // Special check to detect racket bytecode
- if (bytes::hasprefix(buffer, ['#', '~'])) {
- // From src/expander/compile/write-linklet.rkt in racket:
- // - #~
- // - length-prefixed version string (ie. "\x038.5")
- // - length-prefixed virtual machine string (ie. "\x06racket")
- // - 'D' / 'B'
- // Here it verifies that the virtual machine string is racket, assuming none other is supported in the wild.
- // Racket itself only matches against '#~' which is small & only printable-ASCII, so too prone to false positives
- const version_len = buffer[2];
- const version_end = 2+version_len;
- const racket_len = buffer[version_end+1];
- const racket_start = version_end+2;
- if(bytes::equal(racket, buffer[racket_start..racket_start+racket_len])) {
- return "Racket";
- };
- };
- if (bytes::hasprefix(buffer, zip)) {
- if(bytes::equal(jar, buffer[0x27..0x2B])) {
- return "Java JAR";
- };
- };
- return void;
- };
- @test fn id_blob() void = {
- const sources = [
- "test/fixtures/empty",
- "test/fixtures/empty.dts",
- "test/fixtures/hello-dart.dart",
- "test/fixtures/hello-ocaml.ml",
- "test/fixtures/hello-racket.rkt",
- "test/fixtures/hello.1",
- "test/fixtures/hello.c",
- "test/fixtures/hello.cs",
- "test/fixtures/hello.el",
- "test/fixtures/hello.erl",
- "test/fixtures/hello.erl.escript",
- "test/fixtures/hello.java",
- "test/fixtures/hello.lua",
- "test/fixtures/hello.neko",
- "test/fixtures/hello.nqp",
- "test/fixtures/hello.nut",
- "test/fixtures/hello.pir",
- "test/fixtures/hello.py",
- "test/fixtures/hello.wat",
- "test/fixtures/perl_storage.pm",
- ];
- for (let i = 0z; i < len(sources); i += 1) {
- match(id_blob(sources[i])!) {
- case void =>
- continue;
- case let s: str =>
- fmt::fatalf(
- "deblob: error: id_blob({}) got wrongly detected as: {}",
- sources[i], s
- );
- };
- };
- const blobs = [
- ("Racket", "test/fixtures/compiled/hello-racket_rkt.zo"),
- ("DTB", "test/fixtures/empty.dtb"),
- ("ELF", "test/fixtures/hello"),
- ("Dart Kernel snapshot", "test/fixtures/hello-dart.dill"),
- ("Dart JIT snapshot (Dart code)", "test/fixtures/hello-dart.jit"),
- ("Unix ar(1)", "test/fixtures/hello-ocaml.a"),
- ("OCaml", "test/fixtures/hello-ocaml.cma"),
- ("OCaml", "test/fixtures/hello-ocaml.cmi"),
- ("OCaml", "test/fixtures/hello-ocaml.cmo"),
- ("OCaml", "test/fixtures/hello-ocaml.cmx"),
- ("OCaml", "test/fixtures/hello-ocaml.cmxa"),
- ("ELF", "test/fixtures/hello-ocaml.o"),
- ("Unix ar(1)", "test/fixtures/hello.a"),
- ("Erlang FOR1 BEAM", "test/fixtures/hello.beam"),
- ("Erlang #! BEAM", "test/fixtures/hello.beam.escript"),
- ("Java .class / Mach-O exec", "test/fixtures/hello.class"),
- ("Squirrel bytecode", "test/fixtures/hello.cnut"),
- ("Emacs Lisp bytecode", "test/fixtures/hello.elc"),
- ("WinNT EXE", "test/fixtures/hello.exe"),
- ("Java JAR", "test/fixtures/hello.jar"),
- ("Lua bytecode", "test/fixtures/hello.luac53"),
- ("Lua bytecode", "test/fixtures/hello.luac54"),
- ("NekoVM bytecode", "test/fixtures/hello.n"),
- ("MoarVM bytecode", "test/fixtures/hello.nqp.moarvm"),
- ("ELF", "test/fixtures/hello.o"),
- ("Parrot bytecode", "test/fixtures/hello.pir.pbc"),
- ("Wasm", "test/fixtures/hello.wasm"),
- ("WinNT EXE", "test/fixtures/monodx.dll"),
- ("Perl storable v0.7", "test/fixtures/perl_storage.pst"),
- ("Python Pickle v4", "test/fixtures/pickle/hello.4.pickle"),
- ("Python Pickle v5", "test/fixtures/pickle/hello.5.pickle"),
- ("Apple PEF", "test/fixtures/qemu_vga.ndrv"),
- //("", "test/fixtures/option.rom"),
- ("Mach-O exec", "test/fixtures/macos-arm64.o"),
- ];
- for (let i = 0z; i < len(blobs); i += 1) {
- match(id_blob(blobs[i].1)!) {
- case void =>
- fmt::fatalf(
- "deblob: error: id_blob({}) didn't got detected as: {}",
- blobs[i].1, blobs[i].0
- );
- case let s: str =>
- if(s != blobs[i].0)
- {
- fmt::fatalf(
- "deblob: error: id_blob({}) got identified as \"{}\" instead of \"{}\"",
- blobs[i].1, s, blobs[i].0
- );
- };
- };
- };
- };
- fn is_excluded(filename: str) bool = {
- for (let i = 0z; i < len(excludes); i += 1) {
- if (fnmatch::fnmatch(excludes[i], filename, fnmatch::flag::NONE)) {
- return true;
- };
- };
- return false;
- };
- fn append_action(action: str, filename: str, format: str) void = {
- if(!json) return;
- let obj = json::object { ... };
- json::put(&obj, "action", action);
- defer json::take(&obj, "action");
- json::put(&obj, "path", filename);
- defer json::take(&obj, "path");
- json::put(&obj, "format", format);
- defer json::take(&obj, "format");
- let obj_s = json::dumpstr(obj);
- defer free(obj_s);
- static let first_obj: bool = true;
- if(first_obj)
- {
- fmt::fprintf(json_out, "\n\t{}", obj_s)!;
- first_obj = false;
- }
- else
- {
- fmt::fprintf(json_out, ",\n\t{}", obj_s)!;
- };
- };
- fn check_dir(dirname: str) (void | errors::invalid | io::error) = {
- const iter = match (os::iter(dirname)) {
- case let iter: *fs::iterator =>
- yield iter;
- case let err: fs::error =>
- fmt::errorfln("deblob: error: Failed walking directory '{}': {}", dirname, fs::strerror(err))!;
- return errors::invalid;
- };
- defer fs::finish(iter);
- for (true) {
- const ent: fs::dirent = match (fs::next(iter)) {
- case let ent: fs::dirent =>
- yield ent;
- case let e: fs::error =>
- fmt::errorfln("deblob: error: Failed walking directory '{}': {}", dirname, fs::strerror(e))?;
- break;
- case done =>
- break;
- };
- const filename_path = path::init(dirname, ent.name)!;
- const filename = path::string(&filename_path);
- if (fs::isdir(ent.ftype)) {
- check_dir(filename)?;
- } else if(fs::isfile(ent.ftype)) {
- const blob_id = match (id_blob(filename)) {
- case void =>
- continue;
- case let s: str =>
- yield s;
- case let err: fs::error =>
- fmt::errorfln("deblob: error: Failed opening {}: {}",
- filename, fs::strerror(err))!;
- continue;
- case let err: io::error =>
- fmt::errorfln("deblob: error: Failed reading {}: {}",
- filename, io::strerror(err))!;
- continue;
- };
- if (is_excluded(filename)) {
- append_action("ignoring", filename, blob_id);
- fmt::printfln("ignoring {}:\t{}", blob_id, filename)!;
- continue;
- };
- found = true;
- if (noop) {
- append_action("detected", filename, blob_id);
- fmt::printfln("detected {}:\t{}", blob_id, filename)!;
- continue;
- };
- append_action("removing", filename, blob_id);
- fmt::printfln("removing {}:\t{}", blob_id, filename)!;
- match (os::remove(filename)) {
- case void =>
- continue;
- case let e: fs::error =>
- fmt::errorfln("deblob: error: Failed removing file '{}':\t{}",
- filename, fs::strerror(e))!;
- };
- } else {
- // ignore non-(dir/regular-file) like symlinks, blocks, fifo, …
- continue;
- };
- };
- };
- @test fn check_dir() void = {
- const dirname = "test/check_dir-fixtures";
- const files_before = match (os::readdir(dirname)) {
- case let d: []fs::dirent =>
- yield d;
- case let e: fs::error =>
- fmt::fatalf("deblob: error: os::readdir({}): {}", dirname, fs::strerror(e));
- };
- assert(len(files_before) == 61);
- const ret = check_dir(dirname);
- assert(ret is void);
- const files_after = match (os::readdir(dirname)) {
- case let d: []fs::dirent =>
- yield d;
- case let e: fs::error =>
- fmt::fatalf("deblob: error: os::readdir({}): {}", dirname, fs::strerror(e));
- };
- assert(len(files_after) == 30);
- };
- export fn main() void = {
- const cmd = getopt::parse(os::args,
- "Remove binary executable files",
- ('c', "Return error if any non-excluded blobs were found"),
- ('e', "NAME", "Exclude filename from removal (defaults to none)"),
- ('d', "PATH", "Set working directory (default to current dir)"),
- ('j', "PATH", "JSON output file"),
- ('n', "No actual removal, only scan and log"),
- );
- defer getopt::finish(&cmd);
- defer free(excludes);
- let opt_d = "";
- let json_out_path = "";
- for (let i = 0z; i < len(cmd.opts); i += 1) {
- const opt = cmd.opts[i];
- switch (opt.0) {
- case 'c' =>
- check = true;
- case 'e' =>
- append(excludes, opt.1);
- case 'd' =>
- opt_d = opt.1;
- case 'n' =>
- noop = true;
- case 'j' =>
- json = true;
- json_out_path = opt.1;
- case =>
- fmt::fatalf("deblob: error: Unhandled option -{}", opt.0);
- };
- };
- if(json_out_path != "")
- {
- json_out = match (os::create(json_out_path, fs::mode::USER_RW | fs::mode::GROUP_R | fs::mode::OTHER_R)) {
- case let f: io::file =>
- yield f;
- case let e: fs::error =>
- fmt::fatalf("deblob: error: Failed creating/opening file '{}' for JSON output: {}", json_out_path, fs::strerror(e));
- };
- fmt::fprint(json_out, "[")!;
- };
- if(opt_d != "")
- {
- match (os::chdir(opt_d)) {
- case let e: fs::error =>
- fmt::fatalf("deblob: error: Failed changing current directory to '{}': {}", opt_d, fs::strerror(e));
- case void =>
- void;
- };
- };
- fmt::println(":: Checking for blobs")!;
- const ret = check_dir(".");
- fmt::println(":: Done checking for blobs")!;
- if(json_out_path != "")
- {
- fmt::fprint(json_out, "\n]")!;
- match(io::close(json_out)) {
- case void =>
- void;
- case let e: io::error =>
- fmt::fatalf("deblob: error: Failed closing JSON output file '{}': {}", json_out_path, io::strerror(e));
- };
- };
- match (ret) {
- case void =>
- if(check && found) os::exit(2);
- os::exit(0);
- case errors::invalid =>
- os::exit(1);
- case let e: io::error =>
- fmt::errorfln("deblob: error: I/O error while traversing directories: {}", io::strerror(e))!;
- os::exit(1);
- };
- };