main.ha (16165B)
- // Copyright © 2019 deblob Authors <https://hacktivis.me/projects/deblob>
 - // SPDX-License-Identifier: BSD-3-Clause
 - use bytes;
 - use encoding::json;
 - use endian;
 - use errors;
 - use fmt;
 - use fnmatch;
 - use fs;
 - use getopt;
 - use io;
 - use os;
 - use path;
 - use strings;
 - let excludes: []str = [];
 - let noop: bool = false;
 - let check: bool = false;
 - let json: bool = false;
 - const beam: []u8 = ['F', 'O', 'R', '1']; // Erlang BEAM
 - const magic: [_](str, []u8) = [
 - ("ELF", [0x7F, 'E', 'L', 'F']),
 - ("Unix ar(1)", ['!', '<', 'a', 'r', 'c', 'h', '>', '\n']),
 - ("PC-BIOS", [0x55, 0xAA]),
 - ("Erlang FOR1 BEAM", ['F', 'O', 'R', '1']),
 - ("Java .class / Mach-O exec", [0xCA, 0xFE, 0xBA, 0xBE]),
 - ("Mach-O exec", [0xCF, 0xFA, 0xED, 0xFE]),
 - ("WinNT EXE", ['M', 'Z', 0x90, 0x00, 0x03, 0x00, 0x00, 0x00]), // Partial MSDOS stub header (ECMA-335 Common Language Infrastructure)
 - ("Lua bytecode", [0x1B, 'L', 'u', 'a']),
 - ("Wasm", [0x00, 'a', 's', 'm']),
 - ("Apple PEF", ['J', 'o', 'y', '!', 'p', 'e', 'f', 'f']), // Apple Preferred Executable Format
 - ("DTB", [0xD0, 0x0D, 0xFE, 0xED]), // Device Tree Blob (OpenFirmware, u-boot, …)
 - // Python *.pyc bytecode magic numbers (defined in importlib/_bootstrap_external.py)
 - ("Python pyc 2.7", [0x03, 0xF3, '\r', '\n']), // (62211i little-endian)
 - ("Python pyc 3.8", [0x55, 0x0D, '\r', '\n']), // (3413i little-endian)
 - ("Python pyc 3.9", [0x61, 0x0D, '\r', '\n']), // (3425i little-endian)
 - ("Python pyc 3.10", [0x6F, 0x0D, '\r', '\n']), // (3439i little-endian)
 - ("Python pyc 3.11", [0xA7, 0x0D, '\r', '\n']), // (3495i little-endian)
 - ("Python pyc 3.12", [0xCB, 0x0D, '\r', '\n']), // (3531i little-endian)
 - ("Python pyc 3.13", [0xF3, 0x0D, '\r', '\n']), // (3571i little-endian)
 - ("Python pyc 3.14", [0x2B, 0x0E, '\r', '\n']), // (3627i little-endian)
 - // Python pickle object data, similarly to Perl Storage it's dangerous enough to cause code execution
 - ("Python Pickle v2", [0x80, 0x02]), // Protocol 2 + start of frame
 - ("Python Pickle v3", [0x80, 0x03]), // Protocol 3 + start of frame
 - ("Python Pickle v4", [0x80, 0x04, 0x95]), // Protocol 4 + start of frame
 - ("Python Pickle v5", [0x80, 0x05, 0x95]), // Protocol 5 + start of frame
 - // https://github.com/MoarVM/MoarVM/blob/master/docs/bytecode.markdown
 - ("MoarVM bytecode", ['M', 'O', 'A', 'R', 'V', 'M', '\r', '\n']),
 - // https://github.com/parrot/parrot/blob/master/docs/parrotbyte.pod
 - ("Parrot bytecode", [0xFE, 'P', 'B', 'C', '\r', '\n', 0x1A, '\n']),
 - ("Perl storable v0.6", ['p', 'e', 'r', 'l', '-', 's', 't', 'o', 'r', 'e']),
 - ("Perl storable v0.7", ['p', 's', 't', '0']),
 - ("Chez Scheme bytecode", [0x00, 0x00, 0x00, 0x00, 'c', 'h', 'e', 'z']),
 - ("NekoVM bytecode", ['N', 'E', 'K', 'O']),
 - ("Emacs Lisp bytecode", [';', 'E', 'L', 'C']), // Emacs lisp bytecode, if there is known false positives next 4 bytes is the version
 - ("OCaml", ['C', 'a', 'm', 'l', '1', '9', '9', '9']),
 - ("Ren'Py Archive v1", [0x78, 0x9c]),
 - ("Ren'Py Archive v2", ['R', 'P', 'A', '-', '2', '.', '0', ' ']),
 - ("Ren'Py Archive v3", ['R', 'P', 'A', '-', '3', '.', '0', ' ']),
 - ("Squirrel bytecode", [0xFA, 0xFA]),
 - ("Clang Pre-Compiled-Header", ['C', 'P', 'C', 'H']),
 - // Clang Pre-Compiled-Header, followed by Info Block, see:
 - // - clang/lib/Serialization/ASTWriter.cpp ASTWriter::WriteAST
 - // - clang/lib/Serialization/ASTReader.cpp doesntStartWithASTFileMagic
 - // Excluded from fixtures (200KB+), test with:
 - // echo > empty.h && clang -cc1 -nobuiltininc -emit-pch -o empty.h.pch empty.h
 - ("GCC Pre-Compiled-Header", ['g', 'p', 'c', 'h']),
 - // Excluded from fixtures (1.2MB+), test with:
 - // echo > empty.h && gcc empty.h
 - ("GCC Rust Metadata", ['G', 'R', 'S', 'T']), // GCC Rust Metadata (*.rox)
 - ("Dart Kernel snapshot", [0x90, 0xab, 0xcd, 0xef]),
 - // why are these 2 different, and is the C one still in use?
 - // no clue, but both are in the sdk repo.
 - ("Dart JIT snapshot (C code)", [0xdc, 0xdc, 0xf5, 0xf5]), // Dart JIT snapshot, if done from the C code.
 - ("Dart JIT snapshot (Dart code)", [0xdc, 0xdc, 0xf6, 0xf6]), // Dart JIT snapshot, if done from the Dart code.
 - ];
 - const dos_magic: []u8 = ['M', 'Z'];
 - const pe_magic: []u8 = ['P', 'E', 0x00, 0x00];
 - const racket: []u8 = ['r', 'a', 'c', 'k', 'e', 't'];
 - const zip: []u8 = ['P', 'K', 0x03, 0x04];
 - const jar: []u8 = [0xFE, 0xCA, 0, 0];
 - const shebang: []u8 = ['#', '!'];
 - const meta_inf_: []u8 = ['M', 'E', 'T', 'A', '-', 'I', 'N', 'F', '/'];
 - let found: bool = false;
 - let json_out: io::handle = 0;
 - fn id_blob(filename: str) (void | str | fs::error | io::error) = {
 - static let buffer: [4096]u8 = [0...];
 - const file = os::open(filename)?;
 - defer io::close(file)!;
 - if (io::read(file, buffer)? is io::EOF) {
 - // empty file
 - return void;
 - };
 - for (let i = 0z; i < len(magic); i += 1) {
 - assert(len(magic[i].1) > 0);
 - if (bytes::hasprefix(buffer, magic[i].1)) {
 - return magic[i].0;
 - };
 - };
 - // Special check to detect *all* Microsoft Portable Executable files
 - if (bytes::hasprefix(buffer, dos_magic)) {
 - const pe_offset = endian::legetu32(buffer[60..64]);
 - if ((pe_offset <= 4096-4) && bytes::hasprefix(buffer[pe_offset..pe_offset+4], pe_magic)) {
 - return "WinNT EXE";
 - };
 - };
 - // detect binary escripts (PKZIP archive and BEAM supported)
 - if (bytes::hasprefix(buffer, shebang)) {
 - let comment = true;
 - for (let i = 0z; i < 4096; i += 1) {
 - if(comment) {
 - if(buffer[i] == '\n') comment = false;
 - continue;
 - };
 - if(buffer[i] == '%') {
 - comment = true;
 - } else {
 - // First bytes after comments
 - if(bytes::equal(zip, buffer[i..i+len(zip)])) return "Erlang ZIP BEAM";
 - if(bytes::equal(beam, buffer[i..i+len(beam)])) return "Erlang #! BEAM";
 - // source code as script
 - break;
 - };
 - };
 - };
 - // Special check to detect racket bytecode
 - if (bytes::hasprefix(buffer, ['#', '~'])) {
 - // From src/expander/compile/write-linklet.rkt in racket:
 - // - #~
 - // - length-prefixed version string (ie. "\x038.5")
 - // - length-prefixed virtual machine string (ie. "\x06racket")
 - // - 'D' / 'B'
 - // Here it verifies that the virtual machine string is racket, assuming none other is supported in the wild.
 - // Racket itself only matches against '#~' which is small & only printable-ASCII, so too prone to false positives
 - const version_len = buffer[2];
 - const version_end = 2+version_len;
 - const racket_len = buffer[version_end+1];
 - const racket_start = version_end+2;
 - if(bytes::equal(racket, buffer[racket_start..racket_start+racket_len])) {
 - return "Racket";
 - };
 - };
 - if (bytes::hasprefix(buffer, zip)) {
 - // Check first filename
 - // Optional in JAR and probably doesn't have to be the first filename
 - // but it's how it's usually done
 - //
 - // So far seen either META-INF/ and \xFE\xCA\x00\x00 in extra or just META-INF/MANIFEST.MF
 - const fname_len = endian::legetu16(buffer[0x1A..0x1C]);
 - if(fname_len < 256)
 - {
 - const fname_start = 0x1Eu16;
 - if(fname_len >= 9 && bytes::equal(meta_inf_, buffer[fname_start..fname_start+9])) {
 - return "Java JAR";
 - };
 - const extra_start = fname_start+fname_len;
 - const extra_len = endian::legetu16(buffer[0x1D..0x1F]);
 - if(extra_len == 4 && bytes::equal(jar, buffer[extra_start..extra_start+extra_len]))
 - {
 - return "Java JAR";
 - };
 - };
 - };
 - return void;
 - };
 - @test fn id_blob() void = {
 - const sources = [
 - "test/fixtures/empty",
 - "test/fixtures/empty.dts",
 - "test/fixtures/hello-dart.dart",
 - "test/fixtures/hello-ocaml.ml",
 - "test/fixtures/hello-racket.rkt",
 - "test/fixtures/hello.1",
 - "test/fixtures/hello.c",
 - "test/fixtures/hello.cs",
 - "test/fixtures/hello.el",
 - "test/fixtures/hello.erl",
 - "test/fixtures/hello.erl.escript",
 - "test/fixtures/hello.java",
 - "test/fixtures/hello.lua",
 - "test/fixtures/hello.neko",
 - "test/fixtures/hello.nqp",
 - "test/fixtures/hello.nut",
 - "test/fixtures/hello.pir",
 - "test/fixtures/hello.py",
 - "test/fixtures/hello.wat",
 - "test/fixtures/perl_storage.pm",
 - ];
 - for (let i = 0z; i < len(sources); i += 1) {
 - match(id_blob(sources[i])!) {
 - case void =>
 - continue;
 - case let s: str =>
 - fmt::fatalf(
 - "deblob: error: id_blob({}) got wrongly detected as: {}",
 - sources[i], s
 - );
 - };
 - };
 - const blobs = [
 - ("Racket", "test/fixtures/compiled/hello-racket_rkt.zo"),
 - ("DTB", "test/fixtures/empty.dtb"),
 - ("ELF", "test/fixtures/hello"),
 - ("Dart Kernel snapshot", "test/fixtures/hello-dart.dill"),
 - ("Dart JIT snapshot (Dart code)", "test/fixtures/hello-dart.jit"),
 - ("Unix ar(1)", "test/fixtures/hello-ocaml.a"),
 - ("OCaml", "test/fixtures/hello-ocaml.cma"),
 - ("OCaml", "test/fixtures/hello-ocaml.cmi"),
 - ("OCaml", "test/fixtures/hello-ocaml.cmo"),
 - ("OCaml", "test/fixtures/hello-ocaml.cmx"),
 - ("OCaml", "test/fixtures/hello-ocaml.cmxa"),
 - ("ELF", "test/fixtures/hello-ocaml.o"),
 - ("Unix ar(1)", "test/fixtures/hello.a"),
 - ("Erlang FOR1 BEAM", "test/fixtures/hello.beam"),
 - ("Erlang #! BEAM", "test/fixtures/hello.beam.escript"),
 - ("Java .class / Mach-O exec", "test/fixtures/hello.class"),
 - ("Squirrel bytecode", "test/fixtures/hello.cnut"),
 - ("Emacs Lisp bytecode", "test/fixtures/hello.elc"),
 - ("WinNT EXE", "test/fixtures/hello.exe"),
 - ("Java JAR", "test/fixtures/hello.jar"),
 - ("Lua bytecode", "test/fixtures/hello.luac53"),
 - ("Lua bytecode", "test/fixtures/hello.luac54"),
 - ("NekoVM bytecode", "test/fixtures/hello.n"),
 - ("MoarVM bytecode", "test/fixtures/hello.nqp.moarvm"),
 - ("ELF", "test/fixtures/hello.o"),
 - ("Parrot bytecode", "test/fixtures/hello.pir.pbc"),
 - ("Wasm", "test/fixtures/hello.wasm"),
 - ("WinNT EXE", "test/fixtures/monodx.dll"),
 - ("Perl storable v0.7", "test/fixtures/perl_storage.pst"),
 - ("Python Pickle v4", "test/fixtures/pickle/hello.4.pickle"),
 - ("Python Pickle v5", "test/fixtures/pickle/hello.5.pickle"),
 - ("Apple PEF", "test/fixtures/qemu_vga.ndrv"),
 - //("", "test/fixtures/option.rom"),
 - ("Mach-O exec", "test/fixtures/macos-arm64.o"),
 - ];
 - for (let i = 0z; i < len(blobs); i += 1) {
 - match(id_blob(blobs[i].1)!) {
 - case void =>
 - fmt::fatalf(
 - "deblob: error: id_blob({}) didn't got detected as: {}",
 - blobs[i].1, blobs[i].0
 - );
 - case let s: str =>
 - if(s != blobs[i].0)
 - {
 - fmt::fatalf(
 - "deblob: error: id_blob({}) got identified as \"{}\" instead of \"{}\"",
 - blobs[i].1, s, blobs[i].0
 - );
 - };
 - };
 - };
 - };
 - fn is_excluded(filename: str) bool = {
 - for (let i = 0z; i < len(excludes); i += 1) {
 - if (fnmatch::fnmatch(excludes[i], filename, fnmatch::flag::NONE)) {
 - return true;
 - };
 - };
 - return false;
 - };
 - fn append_action(action: str, filename: str, format: str) (void | nomem) = {
 - if(!json) return;
 - let obj = json::object { ... };
 - json::put(&obj, "action", action)?;
 - defer json::take(&obj, "action");
 - json::put(&obj, "path", filename)?;
 - defer json::take(&obj, "path");
 - json::put(&obj, "format", format)?;
 - defer json::take(&obj, "format");
 - let obj_s = json::dumpstr(obj);
 - defer free(obj_s);
 - static let first_obj: bool = true;
 - if(first_obj)
 - {
 - fmt::fprintf(json_out, "\n\t{}", obj_s)!;
 - first_obj = false;
 - }
 - else
 - {
 - fmt::fprintf(json_out, ",\n\t{}", obj_s)!;
 - };
 - };
 - fn check_dir(dirname: str) (void | nomem | errors::invalid | io::error) = {
 - const iter = match (os::iter(dirname)) {
 - case let iter: *fs::iterator =>
 - yield iter;
 - case let err: fs::error =>
 - fmt::errorfln("deblob: error: Failed walking directory '{}': {}", dirname, fs::strerror(err))!;
 - return errors::invalid;
 - };
 - defer fs::finish(iter);
 - for (true) {
 - const ent: fs::dirent = match (fs::next(iter)) {
 - case let ent: fs::dirent =>
 - yield ent;
 - case let e: fs::error =>
 - fmt::errorfln("deblob: error: Failed walking directory '{}': {}", dirname, fs::strerror(e))?;
 - break;
 - case done =>
 - break;
 - };
 - const filename_path = path::init(dirname, ent.name)!;
 - const filename = path::string(&filename_path);
 - if (fs::isdir(ent.ftype)) {
 - check_dir(filename)?;
 - } else if(fs::isfile(ent.ftype)) {
 - const blob_id = match (id_blob(filename)) {
 - case void =>
 - continue;
 - case let s: str =>
 - yield s;
 - case let err: fs::error =>
 - fmt::errorfln("deblob: error: Failed opening {}: {}",
 - filename, fs::strerror(err))!;
 - continue;
 - case let err: io::error =>
 - fmt::errorfln("deblob: error: Failed reading {}: {}",
 - filename, io::strerror(err))!;
 - continue;
 - };
 - if (is_excluded(filename)) {
 - append_action("ignoring", filename, blob_id)?;
 - fmt::printfln("ignoring {}:\t{}", blob_id, filename)!;
 - continue;
 - };
 - found = true;
 - if (noop) {
 - append_action("detected", filename, blob_id)?;
 - fmt::printfln("detected {}:\t{}", blob_id, filename)!;
 - continue;
 - };
 - append_action("removing", filename, blob_id)?;
 - fmt::printfln("removing {}:\t{}", blob_id, filename)!;
 - match (os::remove(filename)) {
 - case void =>
 - continue;
 - case let e: fs::error =>
 - fmt::errorfln("deblob: error: Failed removing file '{}':\t{}",
 - filename, fs::strerror(e))!;
 - };
 - } else {
 - // ignore non-(dir/regular-file) like symlinks, blocks, fifo, …
 - continue;
 - };
 - };
 - };
 - @test fn check_dir() void = {
 - const dirname = "test/check_dir-fixtures";
 - const files_before = match (os::readdir(dirname)) {
 - case let d: []fs::dirent =>
 - yield d;
 - case let e: fs::error =>
 - fmt::fatalf("deblob: error: os::readdir({}): {}", dirname, fs::strerror(e));
 - case nomem =>
 - fmt::fatalf("deblob: error: os::readdir({}): Out of Memory", dirname);
 - };
 - const files_before_exp = 63z;
 - if(len(files_before) != files_before_exp)
 - {
 - fmt::fatalf("deblob: expected {} in files_before, got {}\n", files_before_exp, len(files_before));
 - };
 - const ret = check_dir(dirname);
 - assert(ret is void);
 - const files_after = match (os::readdir(dirname)) {
 - case let d: []fs::dirent =>
 - yield d;
 - case let e: fs::error =>
 - fmt::fatalf("deblob: error: os::readdir({}): {}", dirname, fs::strerror(e));
 - case nomem =>
 - fmt::fatalf("deblob: error: os::readdir({}): Out of Memory", dirname);
 - };
 - const files_after_exp = 31z;
 - if(len(files_after) != files_after_exp)
 - {
 - fmt::fatalf("deblob: expected {} in files_before, got {}\n", files_after_exp, len(files_after));
 - };
 - };
 - export fn main() void = {
 - const cmd = getopt::parse(os::args,
 - "Remove binary executable files",
 - ('c', "Return error if any non-excluded blobs were found"),
 - ('e', "NAME", "Exclude filename from removal (defaults to none)"),
 - ('d', "PATH", "Set working directory (default to current dir)"),
 - ('j', "PATH", "JSON output file"),
 - ('n', "No actual removal, only scan and log"),
 - );
 - defer getopt::finish(&cmd);
 - defer free(excludes);
 - let opt_d = "";
 - let json_out_path = "";
 - for (let i = 0z; i < len(cmd.opts); i += 1) {
 - const opt = cmd.opts[i];
 - switch (opt.0) {
 - case 'c' =>
 - check = true;
 - case 'e' =>
 - append(excludes, opt.1)!;
 - case 'd' =>
 - opt_d = opt.1;
 - case 'n' =>
 - noop = true;
 - case 'j' =>
 - json = true;
 - json_out_path = opt.1;
 - case =>
 - fmt::fatalf("deblob: error: Unhandled option -{}", opt.0);
 - };
 - };
 - if(json_out_path != "")
 - {
 - json_out = match (os::create(json_out_path, fs::mode::USER_RW | fs::mode::GROUP_R | fs::mode::OTHER_R)) {
 - case let f: io::file =>
 - yield f;
 - case let e: fs::error =>
 - fmt::fatalf("deblob: error: Failed creating/opening file '{}' for JSON output: {}", json_out_path, fs::strerror(e));
 - };
 - fmt::fprint(json_out, "[")!;
 - };
 - if(opt_d != "")
 - {
 - match (os::chdir(opt_d)) {
 - case let e: fs::error =>
 - fmt::fatalf("deblob: error: Failed changing current directory to '{}': {}", opt_d, fs::strerror(e));
 - case void =>
 - void;
 - };
 - };
 - fmt::println(":: Checking for blobs")!;
 - const ret = check_dir(".");
 - fmt::println(":: Done checking for blobs")!;
 - if(json_out_path != "")
 - {
 - fmt::fprint(json_out, "\n]")!;
 - match(io::close(json_out)) {
 - case void =>
 - void;
 - case let e: io::error =>
 - fmt::fatalf("deblob: error: Failed closing JSON output file '{}': {}", json_out_path, io::strerror(e));
 - };
 - };
 - match (ret) {
 - case void =>
 - if(check && found) os::exit(2);
 - os::exit(0);
 - case errors::invalid =>
 - os::exit(1);
 - case nomem =>
 - fmt::fatal("deblob: error: Out of Memory");
 - case let e: io::error =>
 - fmt::errorfln("deblob: error: I/O error while traversing directories: {}", io::strerror(e))!;
 - os::exit(1);
 - };
 - };