logo

deblob

remove binary executables from a directory git clone https://anongit.hacktivis.me/git/deblob.git/

main.ha (16165B)


  1. // Copyright © 2019 deblob Authors <https://hacktivis.me/projects/deblob>
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. use bytes;
  4. use encoding::json;
  5. use endian;
  6. use errors;
  7. use fmt;
  8. use fnmatch;
  9. use fs;
  10. use getopt;
  11. use io;
  12. use os;
  13. use path;
  14. use strings;
  15. let excludes: []str = [];
  16. let noop: bool = false;
  17. let check: bool = false;
  18. let json: bool = false;
  19. const beam: []u8 = ['F', 'O', 'R', '1']; // Erlang BEAM
  20. const magic: [_](str, []u8) = [
  21. ("ELF", [0x7F, 'E', 'L', 'F']),
  22. ("Unix ar(1)", ['!', '<', 'a', 'r', 'c', 'h', '>', '\n']),
  23. ("PC-BIOS", [0x55, 0xAA]),
  24. ("Erlang FOR1 BEAM", ['F', 'O', 'R', '1']),
  25. ("Java .class / Mach-O exec", [0xCA, 0xFE, 0xBA, 0xBE]),
  26. ("Mach-O exec", [0xCF, 0xFA, 0xED, 0xFE]),
  27. ("WinNT EXE", ['M', 'Z', 0x90, 0x00, 0x03, 0x00, 0x00, 0x00]), // Partial MSDOS stub header (ECMA-335 Common Language Infrastructure)
  28. ("Lua bytecode", [0x1B, 'L', 'u', 'a']),
  29. ("Wasm", [0x00, 'a', 's', 'm']),
  30. ("Apple PEF", ['J', 'o', 'y', '!', 'p', 'e', 'f', 'f']), // Apple Preferred Executable Format
  31. ("DTB", [0xD0, 0x0D, 0xFE, 0xED]), // Device Tree Blob (OpenFirmware, u-boot, …)
  32. // Python *.pyc bytecode magic numbers (defined in importlib/_bootstrap_external.py)
  33. ("Python pyc 2.7", [0x03, 0xF3, '\r', '\n']), // (62211i little-endian)
  34. ("Python pyc 3.8", [0x55, 0x0D, '\r', '\n']), // (3413i little-endian)
  35. ("Python pyc 3.9", [0x61, 0x0D, '\r', '\n']), // (3425i little-endian)
  36. ("Python pyc 3.10", [0x6F, 0x0D, '\r', '\n']), // (3439i little-endian)
  37. ("Python pyc 3.11", [0xA7, 0x0D, '\r', '\n']), // (3495i little-endian)
  38. ("Python pyc 3.12", [0xCB, 0x0D, '\r', '\n']), // (3531i little-endian)
  39. ("Python pyc 3.13", [0xF3, 0x0D, '\r', '\n']), // (3571i little-endian)
  40. ("Python pyc 3.14", [0x2B, 0x0E, '\r', '\n']), // (3627i little-endian)
  41. // Python pickle object data, similarly to Perl Storage it's dangerous enough to cause code execution
  42. ("Python Pickle v2", [0x80, 0x02]), // Protocol 2 + start of frame
  43. ("Python Pickle v3", [0x80, 0x03]), // Protocol 3 + start of frame
  44. ("Python Pickle v4", [0x80, 0x04, 0x95]), // Protocol 4 + start of frame
  45. ("Python Pickle v5", [0x80, 0x05, 0x95]), // Protocol 5 + start of frame
  46. // https://github.com/MoarVM/MoarVM/blob/master/docs/bytecode.markdown
  47. ("MoarVM bytecode", ['M', 'O', 'A', 'R', 'V', 'M', '\r', '\n']),
  48. // https://github.com/parrot/parrot/blob/master/docs/parrotbyte.pod
  49. ("Parrot bytecode", [0xFE, 'P', 'B', 'C', '\r', '\n', 0x1A, '\n']),
  50. ("Perl storable v0.6", ['p', 'e', 'r', 'l', '-', 's', 't', 'o', 'r', 'e']),
  51. ("Perl storable v0.7", ['p', 's', 't', '0']),
  52. ("Chez Scheme bytecode", [0x00, 0x00, 0x00, 0x00, 'c', 'h', 'e', 'z']),
  53. ("NekoVM bytecode", ['N', 'E', 'K', 'O']),
  54. ("Emacs Lisp bytecode", [';', 'E', 'L', 'C']), // Emacs lisp bytecode, if there is known false positives next 4 bytes is the version
  55. ("OCaml", ['C', 'a', 'm', 'l', '1', '9', '9', '9']),
  56. ("Ren'Py Archive v1", [0x78, 0x9c]),
  57. ("Ren'Py Archive v2", ['R', 'P', 'A', '-', '2', '.', '0', ' ']),
  58. ("Ren'Py Archive v3", ['R', 'P', 'A', '-', '3', '.', '0', ' ']),
  59. ("Squirrel bytecode", [0xFA, 0xFA]),
  60. ("Clang Pre-Compiled-Header", ['C', 'P', 'C', 'H']),
  61. // Clang Pre-Compiled-Header, followed by Info Block, see:
  62. // - clang/lib/Serialization/ASTWriter.cpp ASTWriter::WriteAST
  63. // - clang/lib/Serialization/ASTReader.cpp doesntStartWithASTFileMagic
  64. // Excluded from fixtures (200KB+), test with:
  65. // echo > empty.h && clang -cc1 -nobuiltininc -emit-pch -o empty.h.pch empty.h
  66. ("GCC Pre-Compiled-Header", ['g', 'p', 'c', 'h']),
  67. // Excluded from fixtures (1.2MB+), test with:
  68. // echo > empty.h && gcc empty.h
  69. ("GCC Rust Metadata", ['G', 'R', 'S', 'T']), // GCC Rust Metadata (*.rox)
  70. ("Dart Kernel snapshot", [0x90, 0xab, 0xcd, 0xef]),
  71. // why are these 2 different, and is the C one still in use?
  72. // no clue, but both are in the sdk repo.
  73. ("Dart JIT snapshot (C code)", [0xdc, 0xdc, 0xf5, 0xf5]), // Dart JIT snapshot, if done from the C code.
  74. ("Dart JIT snapshot (Dart code)", [0xdc, 0xdc, 0xf6, 0xf6]), // Dart JIT snapshot, if done from the Dart code.
  75. ];
  76. const dos_magic: []u8 = ['M', 'Z'];
  77. const pe_magic: []u8 = ['P', 'E', 0x00, 0x00];
  78. const racket: []u8 = ['r', 'a', 'c', 'k', 'e', 't'];
  79. const zip: []u8 = ['P', 'K', 0x03, 0x04];
  80. const jar: []u8 = [0xFE, 0xCA, 0, 0];
  81. const shebang: []u8 = ['#', '!'];
  82. const meta_inf_: []u8 = ['M', 'E', 'T', 'A', '-', 'I', 'N', 'F', '/'];
  83. let found: bool = false;
  84. let json_out: io::handle = 0;
  85. fn id_blob(filename: str) (void | str | fs::error | io::error) = {
  86. static let buffer: [4096]u8 = [0...];
  87. const file = os::open(filename)?;
  88. defer io::close(file)!;
  89. if (io::read(file, buffer)? is io::EOF) {
  90. // empty file
  91. return void;
  92. };
  93. for (let i = 0z; i < len(magic); i += 1) {
  94. assert(len(magic[i].1) > 0);
  95. if (bytes::hasprefix(buffer, magic[i].1)) {
  96. return magic[i].0;
  97. };
  98. };
  99. // Special check to detect *all* Microsoft Portable Executable files
  100. if (bytes::hasprefix(buffer, dos_magic)) {
  101. const pe_offset = endian::legetu32(buffer[60..64]);
  102. if ((pe_offset <= 4096-4) && bytes::hasprefix(buffer[pe_offset..pe_offset+4], pe_magic)) {
  103. return "WinNT EXE";
  104. };
  105. };
  106. // detect binary escripts (PKZIP archive and BEAM supported)
  107. if (bytes::hasprefix(buffer, shebang)) {
  108. let comment = true;
  109. for (let i = 0z; i < 4096; i += 1) {
  110. if(comment) {
  111. if(buffer[i] == '\n') comment = false;
  112. continue;
  113. };
  114. if(buffer[i] == '%') {
  115. comment = true;
  116. } else {
  117. // First bytes after comments
  118. if(bytes::equal(zip, buffer[i..i+len(zip)])) return "Erlang ZIP BEAM";
  119. if(bytes::equal(beam, buffer[i..i+len(beam)])) return "Erlang #! BEAM";
  120. // source code as script
  121. break;
  122. };
  123. };
  124. };
  125. // Special check to detect racket bytecode
  126. if (bytes::hasprefix(buffer, ['#', '~'])) {
  127. // From src/expander/compile/write-linklet.rkt in racket:
  128. // - #~
  129. // - length-prefixed version string (ie. "\x038.5")
  130. // - length-prefixed virtual machine string (ie. "\x06racket")
  131. // - 'D' / 'B'
  132. // Here it verifies that the virtual machine string is racket, assuming none other is supported in the wild.
  133. // Racket itself only matches against '#~' which is small & only printable-ASCII, so too prone to false positives
  134. const version_len = buffer[2];
  135. const version_end = 2+version_len;
  136. const racket_len = buffer[version_end+1];
  137. const racket_start = version_end+2;
  138. if(bytes::equal(racket, buffer[racket_start..racket_start+racket_len])) {
  139. return "Racket";
  140. };
  141. };
  142. if (bytes::hasprefix(buffer, zip)) {
  143. // Check first filename
  144. // Optional in JAR and probably doesn't have to be the first filename
  145. // but it's how it's usually done
  146. //
  147. // So far seen either META-INF/ and \xFE\xCA\x00\x00 in extra or just META-INF/MANIFEST.MF
  148. const fname_len = endian::legetu16(buffer[0x1A..0x1C]);
  149. if(fname_len < 256)
  150. {
  151. const fname_start = 0x1Eu16;
  152. if(fname_len >= 9 && bytes::equal(meta_inf_, buffer[fname_start..fname_start+9])) {
  153. return "Java JAR";
  154. };
  155. const extra_start = fname_start+fname_len;
  156. const extra_len = endian::legetu16(buffer[0x1D..0x1F]);
  157. if(extra_len == 4 && bytes::equal(jar, buffer[extra_start..extra_start+extra_len]))
  158. {
  159. return "Java JAR";
  160. };
  161. };
  162. };
  163. return void;
  164. };
  165. @test fn id_blob() void = {
  166. const sources = [
  167. "test/fixtures/empty",
  168. "test/fixtures/empty.dts",
  169. "test/fixtures/hello-dart.dart",
  170. "test/fixtures/hello-ocaml.ml",
  171. "test/fixtures/hello-racket.rkt",
  172. "test/fixtures/hello.1",
  173. "test/fixtures/hello.c",
  174. "test/fixtures/hello.cs",
  175. "test/fixtures/hello.el",
  176. "test/fixtures/hello.erl",
  177. "test/fixtures/hello.erl.escript",
  178. "test/fixtures/hello.java",
  179. "test/fixtures/hello.lua",
  180. "test/fixtures/hello.neko",
  181. "test/fixtures/hello.nqp",
  182. "test/fixtures/hello.nut",
  183. "test/fixtures/hello.pir",
  184. "test/fixtures/hello.py",
  185. "test/fixtures/hello.wat",
  186. "test/fixtures/perl_storage.pm",
  187. ];
  188. for (let i = 0z; i < len(sources); i += 1) {
  189. match(id_blob(sources[i])!) {
  190. case void =>
  191. continue;
  192. case let s: str =>
  193. fmt::fatalf(
  194. "deblob: error: id_blob({}) got wrongly detected as: {}",
  195. sources[i], s
  196. );
  197. };
  198. };
  199. const blobs = [
  200. ("Racket", "test/fixtures/compiled/hello-racket_rkt.zo"),
  201. ("DTB", "test/fixtures/empty.dtb"),
  202. ("ELF", "test/fixtures/hello"),
  203. ("Dart Kernel snapshot", "test/fixtures/hello-dart.dill"),
  204. ("Dart JIT snapshot (Dart code)", "test/fixtures/hello-dart.jit"),
  205. ("Unix ar(1)", "test/fixtures/hello-ocaml.a"),
  206. ("OCaml", "test/fixtures/hello-ocaml.cma"),
  207. ("OCaml", "test/fixtures/hello-ocaml.cmi"),
  208. ("OCaml", "test/fixtures/hello-ocaml.cmo"),
  209. ("OCaml", "test/fixtures/hello-ocaml.cmx"),
  210. ("OCaml", "test/fixtures/hello-ocaml.cmxa"),
  211. ("ELF", "test/fixtures/hello-ocaml.o"),
  212. ("Unix ar(1)", "test/fixtures/hello.a"),
  213. ("Erlang FOR1 BEAM", "test/fixtures/hello.beam"),
  214. ("Erlang #! BEAM", "test/fixtures/hello.beam.escript"),
  215. ("Java .class / Mach-O exec", "test/fixtures/hello.class"),
  216. ("Squirrel bytecode", "test/fixtures/hello.cnut"),
  217. ("Emacs Lisp bytecode", "test/fixtures/hello.elc"),
  218. ("WinNT EXE", "test/fixtures/hello.exe"),
  219. ("Java JAR", "test/fixtures/hello.jar"),
  220. ("Lua bytecode", "test/fixtures/hello.luac53"),
  221. ("Lua bytecode", "test/fixtures/hello.luac54"),
  222. ("NekoVM bytecode", "test/fixtures/hello.n"),
  223. ("MoarVM bytecode", "test/fixtures/hello.nqp.moarvm"),
  224. ("ELF", "test/fixtures/hello.o"),
  225. ("Parrot bytecode", "test/fixtures/hello.pir.pbc"),
  226. ("Wasm", "test/fixtures/hello.wasm"),
  227. ("WinNT EXE", "test/fixtures/monodx.dll"),
  228. ("Perl storable v0.7", "test/fixtures/perl_storage.pst"),
  229. ("Python Pickle v4", "test/fixtures/pickle/hello.4.pickle"),
  230. ("Python Pickle v5", "test/fixtures/pickle/hello.5.pickle"),
  231. ("Apple PEF", "test/fixtures/qemu_vga.ndrv"),
  232. //("", "test/fixtures/option.rom"),
  233. ("Mach-O exec", "test/fixtures/macos-arm64.o"),
  234. ];
  235. for (let i = 0z; i < len(blobs); i += 1) {
  236. match(id_blob(blobs[i].1)!) {
  237. case void =>
  238. fmt::fatalf(
  239. "deblob: error: id_blob({}) didn't got detected as: {}",
  240. blobs[i].1, blobs[i].0
  241. );
  242. case let s: str =>
  243. if(s != blobs[i].0)
  244. {
  245. fmt::fatalf(
  246. "deblob: error: id_blob({}) got identified as \"{}\" instead of \"{}\"",
  247. blobs[i].1, s, blobs[i].0
  248. );
  249. };
  250. };
  251. };
  252. };
  253. fn is_excluded(filename: str) bool = {
  254. for (let i = 0z; i < len(excludes); i += 1) {
  255. if (fnmatch::fnmatch(excludes[i], filename, fnmatch::flag::NONE)) {
  256. return true;
  257. };
  258. };
  259. return false;
  260. };
  261. fn append_action(action: str, filename: str, format: str) (void | nomem) = {
  262. if(!json) return;
  263. let obj = json::object { ... };
  264. json::put(&obj, "action", action)?;
  265. defer json::take(&obj, "action");
  266. json::put(&obj, "path", filename)?;
  267. defer json::take(&obj, "path");
  268. json::put(&obj, "format", format)?;
  269. defer json::take(&obj, "format");
  270. let obj_s = json::dumpstr(obj);
  271. defer free(obj_s);
  272. static let first_obj: bool = true;
  273. if(first_obj)
  274. {
  275. fmt::fprintf(json_out, "\n\t{}", obj_s)!;
  276. first_obj = false;
  277. }
  278. else
  279. {
  280. fmt::fprintf(json_out, ",\n\t{}", obj_s)!;
  281. };
  282. };
  283. fn check_dir(dirname: str) (void | nomem | errors::invalid | io::error) = {
  284. const iter = match (os::iter(dirname)) {
  285. case let iter: *fs::iterator =>
  286. yield iter;
  287. case let err: fs::error =>
  288. fmt::errorfln("deblob: error: Failed walking directory '{}': {}", dirname, fs::strerror(err))!;
  289. return errors::invalid;
  290. };
  291. defer fs::finish(iter);
  292. for (true) {
  293. const ent: fs::dirent = match (fs::next(iter)) {
  294. case let ent: fs::dirent =>
  295. yield ent;
  296. case let e: fs::error =>
  297. fmt::errorfln("deblob: error: Failed walking directory '{}': {}", dirname, fs::strerror(e))?;
  298. break;
  299. case done =>
  300. break;
  301. };
  302. const filename_path = path::init(dirname, ent.name)!;
  303. const filename = path::string(&filename_path);
  304. if (fs::isdir(ent.ftype)) {
  305. check_dir(filename)?;
  306. } else if(fs::isfile(ent.ftype)) {
  307. const blob_id = match (id_blob(filename)) {
  308. case void =>
  309. continue;
  310. case let s: str =>
  311. yield s;
  312. case let err: fs::error =>
  313. fmt::errorfln("deblob: error: Failed opening {}: {}",
  314. filename, fs::strerror(err))!;
  315. continue;
  316. case let err: io::error =>
  317. fmt::errorfln("deblob: error: Failed reading {}: {}",
  318. filename, io::strerror(err))!;
  319. continue;
  320. };
  321. if (is_excluded(filename)) {
  322. append_action("ignoring", filename, blob_id)?;
  323. fmt::printfln("ignoring {}:\t{}", blob_id, filename)!;
  324. continue;
  325. };
  326. found = true;
  327. if (noop) {
  328. append_action("detected", filename, blob_id)?;
  329. fmt::printfln("detected {}:\t{}", blob_id, filename)!;
  330. continue;
  331. };
  332. append_action("removing", filename, blob_id)?;
  333. fmt::printfln("removing {}:\t{}", blob_id, filename)!;
  334. match (os::remove(filename)) {
  335. case void =>
  336. continue;
  337. case let e: fs::error =>
  338. fmt::errorfln("deblob: error: Failed removing file '{}':\t{}",
  339. filename, fs::strerror(e))!;
  340. };
  341. } else {
  342. // ignore non-(dir/regular-file) like symlinks, blocks, fifo, …
  343. continue;
  344. };
  345. };
  346. };
  347. @test fn check_dir() void = {
  348. const dirname = "test/check_dir-fixtures";
  349. const files_before = match (os::readdir(dirname)) {
  350. case let d: []fs::dirent =>
  351. yield d;
  352. case let e: fs::error =>
  353. fmt::fatalf("deblob: error: os::readdir({}): {}", dirname, fs::strerror(e));
  354. case nomem =>
  355. fmt::fatalf("deblob: error: os::readdir({}): Out of Memory", dirname);
  356. };
  357. const files_before_exp = 63z;
  358. if(len(files_before) != files_before_exp)
  359. {
  360. fmt::fatalf("deblob: expected {} in files_before, got {}\n", files_before_exp, len(files_before));
  361. };
  362. const ret = check_dir(dirname);
  363. assert(ret is void);
  364. const files_after = match (os::readdir(dirname)) {
  365. case let d: []fs::dirent =>
  366. yield d;
  367. case let e: fs::error =>
  368. fmt::fatalf("deblob: error: os::readdir({}): {}", dirname, fs::strerror(e));
  369. case nomem =>
  370. fmt::fatalf("deblob: error: os::readdir({}): Out of Memory", dirname);
  371. };
  372. const files_after_exp = 31z;
  373. if(len(files_after) != files_after_exp)
  374. {
  375. fmt::fatalf("deblob: expected {} in files_before, got {}\n", files_after_exp, len(files_after));
  376. };
  377. };
  378. export fn main() void = {
  379. const cmd = getopt::parse(os::args,
  380. "Remove binary executable files",
  381. ('c', "Return error if any non-excluded blobs were found"),
  382. ('e', "NAME", "Exclude filename from removal (defaults to none)"),
  383. ('d', "PATH", "Set working directory (default to current dir)"),
  384. ('j', "PATH", "JSON output file"),
  385. ('n', "No actual removal, only scan and log"),
  386. );
  387. defer getopt::finish(&cmd);
  388. defer free(excludes);
  389. let opt_d = "";
  390. let json_out_path = "";
  391. for (let i = 0z; i < len(cmd.opts); i += 1) {
  392. const opt = cmd.opts[i];
  393. switch (opt.0) {
  394. case 'c' =>
  395. check = true;
  396. case 'e' =>
  397. append(excludes, opt.1)!;
  398. case 'd' =>
  399. opt_d = opt.1;
  400. case 'n' =>
  401. noop = true;
  402. case 'j' =>
  403. json = true;
  404. json_out_path = opt.1;
  405. case =>
  406. fmt::fatalf("deblob: error: Unhandled option -{}", opt.0);
  407. };
  408. };
  409. if(json_out_path != "")
  410. {
  411. json_out = match (os::create(json_out_path, fs::mode::USER_RW | fs::mode::GROUP_R | fs::mode::OTHER_R)) {
  412. case let f: io::file =>
  413. yield f;
  414. case let e: fs::error =>
  415. fmt::fatalf("deblob: error: Failed creating/opening file '{}' for JSON output: {}", json_out_path, fs::strerror(e));
  416. };
  417. fmt::fprint(json_out, "[")!;
  418. };
  419. if(opt_d != "")
  420. {
  421. match (os::chdir(opt_d)) {
  422. case let e: fs::error =>
  423. fmt::fatalf("deblob: error: Failed changing current directory to '{}': {}", opt_d, fs::strerror(e));
  424. case void =>
  425. void;
  426. };
  427. };
  428. fmt::println(":: Checking for blobs")!;
  429. const ret = check_dir(".");
  430. fmt::println(":: Done checking for blobs")!;
  431. if(json_out_path != "")
  432. {
  433. fmt::fprint(json_out, "\n]")!;
  434. match(io::close(json_out)) {
  435. case void =>
  436. void;
  437. case let e: io::error =>
  438. fmt::fatalf("deblob: error: Failed closing JSON output file '{}': {}", json_out_path, io::strerror(e));
  439. };
  440. };
  441. match (ret) {
  442. case void =>
  443. if(check && found) os::exit(2);
  444. os::exit(0);
  445. case errors::invalid =>
  446. os::exit(1);
  447. case nomem =>
  448. fmt::fatal("deblob: error: Out of Memory");
  449. case let e: io::error =>
  450. fmt::errorfln("deblob: error: I/O error while traversing directories: {}", io::strerror(e))!;
  451. os::exit(1);
  452. };
  453. };