logo

deblob

remove binary executables from a directory git clone https://anongit.hacktivis.me/git/deblob.git/

main.ha (14763B)


  1. // Copyright © 2019 deblob Authors <https://hacktivis.me/projects/deblob>
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. use bytes;
  4. use encoding::json;
  5. use endian;
  6. use errors;
  7. use fmt;
  8. use fnmatch;
  9. use fs;
  10. use getopt;
  11. use io;
  12. use os;
  13. use path;
  14. use strings;
  15. let excludes: []str = [];
  16. let noop: bool = false;
  17. let check: bool = false;
  18. let json: bool = false;
  19. const beam: []u8 = ['F', 'O', 'R', '1']; // Erlang BEAM
  20. const magic: [_](str, []u8) = [
  21. ("ELF", [0x7F, 'E', 'L', 'F']),
  22. ("Unix ar(1)", ['!', '<', 'a', 'r', 'c', 'h', '>', '\n']),
  23. ("PC-BIOS", [0x55, 0xAA]),
  24. ("Erlang FOR1 BEAM", ['F', 'O', 'R', '1']),
  25. ("Java .class / Mach-O exec", [0xCA, 0xFE, 0xBA, 0xBE]),
  26. ("Mach-O exec", [0xCF, 0xFA, 0xED, 0xFE]),
  27. ("WinNT EXE", ['M', 'Z', 0x90, 0x00, 0x03, 0x00, 0x00, 0x00]), // Partial MSDOS stub header (ECMA-335 Common Language Infrastructure)
  28. ("Lua bytecode", [0x1B, 'L', 'u', 'a']),
  29. ("Wasm", [0x00, 'a', 's', 'm']),
  30. ("Apple PEF", ['J', 'o', 'y', '!', 'p', 'e', 'f', 'f']), // Apple Preferred Executable Format
  31. ("DTB", [0xD0, 0x0D, 0xFE, 0xED]), // Device Tree Blob (OpenFirmware, u-boot, …)
  32. // Python *.pyc bytecode magic numbers (defined in importlib/_bootstrap_external.py)
  33. ("Python pyc 2.7", [0x03, 0xF3, '\r', '\n']), // (62211i little-endian)
  34. ("Python pyc 3.8", [0x55, 0x0D, '\r', '\n']), // (3413i litte-endian)
  35. ("Python pyc 3.9", [0x61, 0x0D, '\r', '\n']), // (3425i litte-endian)
  36. ("Python pyc 3.10", [0x6F, 0x0D, '\r', '\n']), // (3439i litte-endian)
  37. ("Python pyc 3.11", [0xA7, 0x0D, '\r', '\n']), // (3495i litte-endian)
  38. ("Python pyc 3.12", [0xCB, 0x0D, '\r', '\n']), // (3531i litte-endian)
  39. // Python pickle object data, similarly to Perl Storage it's dangerous enough to cause code execution
  40. ("Python Pickle v2", [0x80, 0x02]), // Protocol 2 + start of frame
  41. ("Python Pickle v3", [0x80, 0x03]), // Protocol 3 + start of frame
  42. ("Python Pickle v4", [0x80, 0x04, 0x95]), // Protocol 4 + start of frame
  43. ("Python Pickle v5", [0x80, 0x05, 0x95]), // Protocol 5 + start of frame
  44. // https://github.com/MoarVM/MoarVM/blob/master/docs/bytecode.markdown
  45. ("MoarVM bytecode", ['M', 'O', 'A', 'R', 'V', 'M', '\r', '\n']),
  46. // https://github.com/parrot/parrot/blob/master/docs/parrotbyte.pod
  47. ("Parrot bytecode", [0xFE, 'P', 'B', 'C', '\r', '\n', 0x1A, '\n']),
  48. ("Perl storable v0.6", ['p', 'e', 'r', 'l', '-', 's', 't', 'o', 'r', 'e']),
  49. ("Perl storable v0.7", ['p', 's', 't', '0']),
  50. ("Chez Scheme bytecode", [0x00, 0x00, 0x00, 0x00, 'c', 'h', 'e', 'z']),
  51. ("NekoVM bytecode", ['N', 'E', 'K', 'O']),
  52. ("Emacs Lisp bytecode", [';', 'E', 'L', 'C']), // Emacs lisp bytecode, if there is known false positives next 4 bytes is the version
  53. ("OCaml", ['C', 'a', 'm', 'l', '1', '9', '9', '9']),
  54. ("Ren'Py Archive v1", [0x78, 0x9c]),
  55. ("Ren'Py Archive v2", ['R', 'P', 'A', '-', '2', '.', '0', ' ']),
  56. ("Ren'Py Archive v3", ['R', 'P', 'A', '-', '3', '.', '0', ' ']),
  57. ("Squirrel bytecode", [0xFA, 0xFA]),
  58. ("Clang Pre-Compiled-Header", ['C', 'P', 'C', 'H']),
  59. // Clang Pre-Compiled-Header, followed by Info Block, see:
  60. // - clang/lib/Serialization/ASTWriter.cpp ASTWriter::WriteAST
  61. // - clang/lib/Serialization/ASTReader.cpp doesntStartWithASTFileMagic
  62. // Excluded from fixtures (200KB+), test with:
  63. // echo > empty.h && clang -cc1 -nobuiltininc -emit-pch -o empty.h.pch empty.h
  64. ("GCC Pre-Compiled-Header", ['g', 'p', 'c', 'h']),
  65. // Excluded from fixtures (1.2MB+), test with:
  66. // echo > empty.h && gcc empty.h
  67. ("GCC Rust Metadata", ['G', 'R', 'S', 'T']), // GCC Rust Metadata (*.rox)
  68. ("Dart Kernel snapshot", [0x90, 0xab, 0xcd, 0xef]),
  69. // why are these 2 different, and is the C one still in use?
  70. // no clue, but both are in the sdk repo.
  71. ("Dart JIT snapshot (C code)", [0xdc, 0xdc, 0xf5, 0xf5]), // Dart JIT snapshot, if done from the C code.
  72. ("Dart JIT snapshot (Dart code)", [0xdc, 0xdc, 0xf6, 0xf6]), // Dart JIT snapshot, if done from the Dart code.
  73. ];
  74. const dos_magic: []u8 = ['M', 'Z'];
  75. const pe_magic: []u8 = ['P', 'E', 0x00, 0x00];
  76. const racket: []u8 = ['r', 'a', 'c', 'k', 'e', 't'];
  77. const zip: []u8 = ['P', 'K', 0x03, 0x04];
  78. const jar: []u8 = [0xFE, 0xCA, 0, 0];
  79. const shebang: []u8 = ['#', '!'];
  80. let found: bool = false;
  81. let json_out: io::handle = 0;
  82. fn id_blob(filename: str) (void | str | fs::error | io::error) = {
  83. static let buffer: [4096]u8 = [0...];
  84. const file = os::open(filename)?;
  85. defer io::close(file)!;
  86. if (io::read(file, buffer)? is io::EOF) {
  87. // empty file
  88. return void;
  89. };
  90. for (let i = 0z; i < len(magic); i += 1) {
  91. assert(len(magic[i].1) > 0);
  92. if (bytes::hasprefix(buffer, magic[i].1)) {
  93. return magic[i].0;
  94. };
  95. };
  96. // Special check to detect *all* Microsoft Portable Executable files
  97. if (bytes::hasprefix(buffer, dos_magic)) {
  98. const pe_offset = endian::legetu32(buffer[60..64]);
  99. if ((pe_offset <= 4096-4) && bytes::hasprefix(buffer[pe_offset..pe_offset+4], pe_magic)) {
  100. return "WinNT EXE";
  101. };
  102. };
  103. // detect binary escripts (PKZIP archive and BEAM supported)
  104. if (bytes::hasprefix(buffer, shebang)) {
  105. let comment = true;
  106. for (let i = 0z; i < 4096; i += 1) {
  107. if(comment) {
  108. if(buffer[i] == '\n') comment = false;
  109. continue;
  110. };
  111. if(buffer[i] == '%') {
  112. comment = true;
  113. } else {
  114. // First bytes after comments
  115. if(bytes::equal(zip, buffer[i..i+len(zip)])) return "Erlang ZIP BEAM";
  116. if(bytes::equal(beam, buffer[i..i+len(beam)])) return "Erlang #! BEAM";
  117. // source code as script
  118. break;
  119. };
  120. };
  121. };
  122. // Special check to detect racket bytecode
  123. if (bytes::hasprefix(buffer, ['#', '~'])) {
  124. // From src/expander/compile/write-linklet.rkt in racket:
  125. // - #~
  126. // - length-prefixed version string (ie. "\x038.5")
  127. // - length-prefixed virtual machine string (ie. "\x06racket")
  128. // - 'D' / 'B'
  129. // Here it verifies that the virtual machine string is racket, assuming none other is supported in the wild.
  130. // Racket itself only matches against '#~' which is small & only printable-ASCII, so too prone to false positives
  131. const version_len = buffer[2];
  132. const version_end = 2+version_len;
  133. const racket_len = buffer[version_end+1];
  134. const racket_start = version_end+2;
  135. if(bytes::equal(racket, buffer[racket_start..racket_start+racket_len])) {
  136. return "Racket";
  137. };
  138. };
  139. if (bytes::hasprefix(buffer, zip)) {
  140. if(bytes::equal(jar, buffer[0x27..0x2B])) {
  141. return "Java JAR";
  142. };
  143. };
  144. return void;
  145. };
  146. @test fn id_blob() void = {
  147. const sources = [
  148. "test/fixtures/empty",
  149. "test/fixtures/empty.dts",
  150. "test/fixtures/hello-dart.dart",
  151. "test/fixtures/hello-ocaml.ml",
  152. "test/fixtures/hello-racket.rkt",
  153. "test/fixtures/hello.1",
  154. "test/fixtures/hello.c",
  155. "test/fixtures/hello.cs",
  156. "test/fixtures/hello.el",
  157. "test/fixtures/hello.erl",
  158. "test/fixtures/hello.erl.escript",
  159. "test/fixtures/hello.java",
  160. "test/fixtures/hello.lua",
  161. "test/fixtures/hello.neko",
  162. "test/fixtures/hello.nqp",
  163. "test/fixtures/hello.nut",
  164. "test/fixtures/hello.pir",
  165. "test/fixtures/hello.py",
  166. "test/fixtures/hello.wat",
  167. "test/fixtures/perl_storage.pm",
  168. ];
  169. for (let i = 0z; i < len(sources); i += 1) {
  170. match(id_blob(sources[i])!) {
  171. case void =>
  172. continue;
  173. case let s: str =>
  174. fmt::fatalf(
  175. "deblob: error: id_blob({}) got wrongly detected as: {}",
  176. sources[i], s
  177. );
  178. };
  179. };
  180. const blobs = [
  181. ("Racket", "test/fixtures/compiled/hello-racket_rkt.zo"),
  182. ("DTB", "test/fixtures/empty.dtb"),
  183. ("ELF", "test/fixtures/hello"),
  184. ("Dart Kernel snapshot", "test/fixtures/hello-dart.dill"),
  185. ("Dart JIT snapshot (Dart code)", "test/fixtures/hello-dart.jit"),
  186. ("Unix ar(1)", "test/fixtures/hello-ocaml.a"),
  187. ("OCaml", "test/fixtures/hello-ocaml.cma"),
  188. ("OCaml", "test/fixtures/hello-ocaml.cmi"),
  189. ("OCaml", "test/fixtures/hello-ocaml.cmo"),
  190. ("OCaml", "test/fixtures/hello-ocaml.cmx"),
  191. ("OCaml", "test/fixtures/hello-ocaml.cmxa"),
  192. ("ELF", "test/fixtures/hello-ocaml.o"),
  193. ("Unix ar(1)", "test/fixtures/hello.a"),
  194. ("Erlang FOR1 BEAM", "test/fixtures/hello.beam"),
  195. ("Erlang #! BEAM", "test/fixtures/hello.beam.escript"),
  196. ("Java .class / Mach-O exec", "test/fixtures/hello.class"),
  197. ("Squirrel bytecode", "test/fixtures/hello.cnut"),
  198. ("Emacs Lisp bytecode", "test/fixtures/hello.elc"),
  199. ("WinNT EXE", "test/fixtures/hello.exe"),
  200. ("Java JAR", "test/fixtures/hello.jar"),
  201. ("Lua bytecode", "test/fixtures/hello.luac53"),
  202. ("Lua bytecode", "test/fixtures/hello.luac54"),
  203. ("NekoVM bytecode", "test/fixtures/hello.n"),
  204. ("MoarVM bytecode", "test/fixtures/hello.nqp.moarvm"),
  205. ("ELF", "test/fixtures/hello.o"),
  206. ("Parrot bytecode", "test/fixtures/hello.pir.pbc"),
  207. ("Wasm", "test/fixtures/hello.wasm"),
  208. ("WinNT EXE", "test/fixtures/monodx.dll"),
  209. ("Perl storable v0.7", "test/fixtures/perl_storage.pst"),
  210. ("Python Pickle v4", "test/fixtures/pickle/hello.4.pickle"),
  211. ("Python Pickle v5", "test/fixtures/pickle/hello.5.pickle"),
  212. ("Apple PEF", "test/fixtures/qemu_vga.ndrv"),
  213. //("", "test/fixtures/option.rom"),
  214. ("Mach-O exec", "test/fixtures/macos-arm64.o"),
  215. ];
  216. for (let i = 0z; i < len(blobs); i += 1) {
  217. match(id_blob(blobs[i].1)!) {
  218. case void =>
  219. fmt::fatalf(
  220. "deblob: error: id_blob({}) didn't got detected as: {}",
  221. blobs[i].1, blobs[i].0
  222. );
  223. case let s: str =>
  224. if(s != blobs[i].0)
  225. {
  226. fmt::fatalf(
  227. "deblob: error: id_blob({}) got identified as \"{}\" instead of \"{}\"",
  228. blobs[i].1, s, blobs[i].0
  229. );
  230. };
  231. };
  232. };
  233. };
  234. fn is_excluded(filename: str) bool = {
  235. for (let i = 0z; i < len(excludes); i += 1) {
  236. if (fnmatch::fnmatch(excludes[i], filename, fnmatch::flag::NONE)) {
  237. return true;
  238. };
  239. };
  240. return false;
  241. };
  242. fn append_action(action: str, filename: str, format: str) void = {
  243. if(!json) return;
  244. let obj = json::object { ... };
  245. json::put(&obj, "action", action);
  246. defer json::take(&obj, "action");
  247. json::put(&obj, "path", filename);
  248. defer json::take(&obj, "path");
  249. json::put(&obj, "format", format);
  250. defer json::take(&obj, "format");
  251. let obj_s = json::dumpstr(obj);
  252. defer free(obj_s);
  253. static let first_obj: bool = true;
  254. if(first_obj)
  255. {
  256. fmt::fprintf(json_out, "\n\t{}", obj_s)!;
  257. first_obj = false;
  258. }
  259. else
  260. {
  261. fmt::fprintf(json_out, ",\n\t{}", obj_s)!;
  262. };
  263. };
  264. fn check_dir(dirname: str) (void | errors::invalid | io::error) = {
  265. const iter = match (os::iter(dirname)) {
  266. case let iter: *fs::iterator =>
  267. yield iter;
  268. case let err: fs::error =>
  269. fmt::errorfln("deblob: error: Failed walking directory '{}': {}", dirname, fs::strerror(err))!;
  270. return errors::invalid;
  271. };
  272. defer fs::finish(iter);
  273. for (true) {
  274. const ent: fs::dirent = match (fs::next(iter)) {
  275. case let ent: fs::dirent =>
  276. yield ent;
  277. case let e: fs::error =>
  278. fmt::errorfln("deblob: error: Failed walking directory '{}': {}", dirname, fs::strerror(e))?;
  279. break;
  280. case done =>
  281. break;
  282. };
  283. const filename_path = path::init(dirname, ent.name)!;
  284. const filename = path::string(&filename_path);
  285. if (fs::isdir(ent.ftype)) {
  286. check_dir(filename)?;
  287. } else if(fs::isfile(ent.ftype)) {
  288. const blob_id = match (id_blob(filename)) {
  289. case void =>
  290. continue;
  291. case let s: str =>
  292. yield s;
  293. case let err: fs::error =>
  294. fmt::errorfln("deblob: error: Failed opening {}: {}",
  295. filename, fs::strerror(err))!;
  296. continue;
  297. case let err: io::error =>
  298. fmt::errorfln("deblob: error: Failed reading {}: {}",
  299. filename, io::strerror(err))!;
  300. continue;
  301. };
  302. if (is_excluded(filename)) {
  303. append_action("ignoring", filename, blob_id);
  304. fmt::printfln("ignoring {}:\t{}", blob_id, filename)!;
  305. continue;
  306. };
  307. found = true;
  308. if (noop) {
  309. append_action("detected", filename, blob_id);
  310. fmt::printfln("detected {}:\t{}", blob_id, filename)!;
  311. continue;
  312. };
  313. append_action("removing", filename, blob_id);
  314. fmt::printfln("removing {}:\t{}", blob_id, filename)!;
  315. match (os::remove(filename)) {
  316. case void =>
  317. continue;
  318. case let e: fs::error =>
  319. fmt::errorfln("deblob: error: Failed removing file '{}':\t{}",
  320. filename, fs::strerror(e))!;
  321. };
  322. } else {
  323. // ignore non-(dir/regular-file) like symlinks, blocks, fifo, …
  324. continue;
  325. };
  326. };
  327. };
  328. @test fn check_dir() void = {
  329. const dirname = "test/check_dir-fixtures";
  330. const files_before = match (os::readdir(dirname)) {
  331. case let d: []fs::dirent =>
  332. yield d;
  333. case let e: fs::error =>
  334. fmt::fatalf("deblob: error: os::readdir({}): {}", dirname, fs::strerror(e));
  335. };
  336. assert(len(files_before) == 61);
  337. const ret = check_dir(dirname);
  338. assert(ret is void);
  339. const files_after = match (os::readdir(dirname)) {
  340. case let d: []fs::dirent =>
  341. yield d;
  342. case let e: fs::error =>
  343. fmt::fatalf("deblob: error: os::readdir({}): {}", dirname, fs::strerror(e));
  344. };
  345. assert(len(files_after) == 30);
  346. };
  347. export fn main() void = {
  348. const cmd = getopt::parse(os::args,
  349. "Remove binary executable files",
  350. ('c', "Return error if any non-excluded blobs were found"),
  351. ('e', "NAME", "Exclude filename from removal (defaults to none)"),
  352. ('d', "PATH", "Set working directory (default to current dir)"),
  353. ('j', "PATH", "JSON output file"),
  354. ('n', "No actual removal, only scan and log"),
  355. );
  356. defer getopt::finish(&cmd);
  357. defer free(excludes);
  358. let opt_d = "";
  359. let json_out_path = "";
  360. for (let i = 0z; i < len(cmd.opts); i += 1) {
  361. const opt = cmd.opts[i];
  362. switch (opt.0) {
  363. case 'c' =>
  364. check = true;
  365. case 'e' =>
  366. append(excludes, opt.1);
  367. case 'd' =>
  368. opt_d = opt.1;
  369. case 'n' =>
  370. noop = true;
  371. case 'j' =>
  372. json = true;
  373. json_out_path = opt.1;
  374. case =>
  375. fmt::fatalf("deblob: error: Unhandled option -{}", opt.0);
  376. };
  377. };
  378. if(json_out_path != "")
  379. {
  380. json_out = match (os::create(json_out_path, fs::mode::USER_RW | fs::mode::GROUP_R | fs::mode::OTHER_R)) {
  381. case let f: io::file =>
  382. yield f;
  383. case let e: fs::error =>
  384. fmt::fatalf("deblob: error: Failed creating/opening file '{}' for JSON output: {}", json_out_path, fs::strerror(e));
  385. };
  386. fmt::fprint(json_out, "[")!;
  387. };
  388. if(opt_d != "")
  389. {
  390. match (os::chdir(opt_d)) {
  391. case let e: fs::error =>
  392. fmt::fatalf("deblob: error: Failed changing current directory to '{}': {}", opt_d, fs::strerror(e));
  393. case void =>
  394. void;
  395. };
  396. };
  397. fmt::println(":: Checking for blobs")!;
  398. const ret = check_dir(".");
  399. fmt::println(":: Done checking for blobs")!;
  400. if(json_out_path != "")
  401. {
  402. fmt::fprint(json_out, "\n]")!;
  403. match(io::close(json_out)) {
  404. case void =>
  405. void;
  406. case let e: io::error =>
  407. fmt::fatalf("deblob: error: Failed closing JSON output file '{}': {}", json_out_path, io::strerror(e));
  408. };
  409. };
  410. match (ret) {
  411. case void =>
  412. if(check && found) os::exit(2);
  413. os::exit(0);
  414. case errors::invalid =>
  415. os::exit(1);
  416. case let e: io::error =>
  417. fmt::errorfln("deblob: error: I/O error while traversing directories: {}", io::strerror(e))!;
  418. os::exit(1);
  419. };
  420. };