logo

9utils

Collection of utilities inspired by Plan9

unicode.ha (3772B)


  1. // Collection of utilities inspired by Plan9
  2. // SPDX-FileCopyrightText: 2023 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. use fmt;
  5. use getopt;
  6. use io;
  7. use os;
  8. use strconv;
  9. use strings;
  10. use unix::tty;
  11. let numeric = false;
  12. let text = false;
  13. // print table from hexmin-hexmax
  14. fn table(args: []str) void = {
  15. const tsize = if(tty::isatty(os::stdout_file)) {
  16. yield match(tty::winsize(os::stdout_file)) {
  17. case let s: tty::ttysize =>
  18. yield s;
  19. case let e: tty::error =>
  20. fmt::errorfln("unicode: Warning: Can't get current terminal size: {}", tty::strerror(e))!;
  21. };
  22. } else {
  23. yield void;
  24. };
  25. for(let i = 0z; i < len(args); i += 1) {
  26. const arg = args[i];
  27. let (min, max) = strings::cut(arg, "-");
  28. let min = match(strconv::stou32b(min, strconv::base::HEX)) {
  29. case let u: u32 => yield u;
  30. case strconv::overflow =>
  31. fmt::fatalf("unicode: Integer overflow with hexmin argument: {}", arg);
  32. case strconv::invalid =>
  33. fmt::fatalf("unicode: Expected hexadecimal for hexmin, got: {}", arg);
  34. };
  35. let max = match(strconv::stou32b(max, strconv::base::HEX)) {
  36. case let u: u32 => yield u;
  37. case strconv::overflow =>
  38. fmt::fatalf("unicode: Integer overflow with hexmax argument: {}", arg);
  39. case strconv::invalid =>
  40. fmt::fatalf("unicode: Expected hexadecimal for hexmax, got: {}", arg);
  41. };
  42. if(min >= max) {
  43. fmt::fatalf("unicode: Error min({:x}) >= max({:x})", min, max);
  44. };
  45. const (cel_width, cel_fmt) = if(max < 0xFFFF) {
  46. // (4 hex, non-break space, 1 char, 2 space) = 8 chars
  47. yield (8u16, "{:.4X} {} ");
  48. } else {
  49. // (8 hex, non-break space, 1 char, 2 space) = 12 chars
  50. yield (12u16, "{:.8X} {} ");
  51. };
  52. match(tsize) {
  53. case void =>
  54. for(let c = min; c <= max; c += 1) {
  55. fmt::printf(cel_fmt, c, c: rune)!;
  56. };
  57. fmt::println()!;
  58. case let ts: tty::ttysize =>
  59. const row_width = ts.columns / cel_width;
  60. for(let r = min; r <= max; r += row_width) {
  61. for(let c = r; c < r+row_width && c <= max; c += 1) {
  62. fmt::printf(cel_fmt, c, c: rune)!;
  63. };
  64. fmt::println()!;
  65. };
  66. };
  67. };
  68. };
  69. // characters to codepoints
  70. fn decode(args: []str) void = {
  71. for(let i = 0z; i < len(args); i += 1) {
  72. const arg = args[i];
  73. const runes = strings::torunes(arg);
  74. for(let ri = 0z; ri < len(runes); ri += 1) {
  75. const r = runes[ri]: u32;
  76. // Print as 4 characters min to match UnicodeData.txt formatting
  77. fmt::printfln("{:.4X}", r)!;
  78. };
  79. };
  80. };
  81. // codepoints to characters
  82. fn encode(args: []str) void = {
  83. let chars: []rune = [];
  84. for(let i = 0z; i < len(args); i += 1) {
  85. const arg = args[i];
  86. const r = match(strconv::stou32b(arg, strconv::base::HEX)) {
  87. case let u: u32 =>
  88. yield u: rune;
  89. case strconv::overflow =>
  90. fmt::fatalf("unicode: Integer overflow with codepoints argument: {}", arg);
  91. case strconv::invalid =>
  92. fmt::fatalf("unicode: Encoding failed, expected hexadecimal, got: {}", arg);
  93. };
  94. if(text) {
  95. fmt::print(r)!;
  96. } else {
  97. fmt::println(r)!;
  98. };
  99. };
  100. };
  101. export fn main() void = {
  102. const cmd = getopt::parse(os::args,
  103. "Encode/Decode unicode characters",
  104. ('n', "Numeric output"),
  105. ('t', "Running text. Newlines aren't inserted after each character."),
  106. "hexmin-hexmax ... | characters ... | codepoints ...",
  107. );
  108. defer getopt::finish(&cmd);
  109. for (let i = 0z; i < len(cmd.opts); i += 1) {
  110. const opt = cmd.opts[i];
  111. switch(opt.0) {
  112. case 'n' => numeric = true;
  113. case 't' => text = true;
  114. case => abort("unhandled option");
  115. };
  116. };
  117. if(len(cmd.args) == 0) {
  118. getopt::printusage(os::stderr, "unicode", cmd.help)!;
  119. return;
  120. };
  121. if(strings::contains(cmd.args[0], '-')) {
  122. table(cmd.args);
  123. } else {
  124. if(numeric) {
  125. decode(cmd.args);
  126. } else {
  127. encode(cmd.args);
  128. };
  129. };
  130. };