logo

9utils

Collection of utilities inspired by Plan9 git clone https://anongit.hacktivis.me/git/9utils.git/

unicode.ha (3911B)


  1. // Collection of utilities inspired by Plan9
  2. // SPDX-FileCopyrightText: 2023 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. use fmt;
  5. use getopt;
  6. use io;
  7. use os;
  8. use strconv;
  9. use strings;
  10. use unix::tty;
  11. let numeric = false;
  12. let text = false;
  13. // print table from hexmin-hexmax
  14. fn table(args: []str) void = {
  15. const tsize = if(tty::isatty(os::stdout_file)) {
  16. yield match(tty::winsize(os::stdout_file)) {
  17. case let s: tty::ttysize =>
  18. yield s;
  19. case let e: tty::error =>
  20. fmt::errorfln("unicode: Warning: Can't get current terminal size: {}", tty::strerror(e))!;
  21. };
  22. } else {
  23. yield void;
  24. };
  25. for(let i = 0z; i < len(args); i += 1) {
  26. const arg = args[i];
  27. let (min, max) = strings::cut(arg, "-");
  28. let min = match(strconv::stou32(min, strconv::base::HEX)) {
  29. case let u: u32 => yield u;
  30. case strconv::overflow =>
  31. fmt::fatalf("unicode: Integer overflow with hexmin argument: {}", arg);
  32. case strconv::invalid =>
  33. fmt::fatalf("unicode: Expected hexadecimal for hexmin, got: {}", arg);
  34. };
  35. let max = match(strconv::stou32(max, strconv::base::HEX)) {
  36. case let u: u32 => yield u;
  37. case strconv::overflow =>
  38. fmt::fatalf("unicode: Integer overflow with hexmax argument: {}", arg);
  39. case strconv::invalid =>
  40. fmt::fatalf("unicode: Expected hexadecimal for hexmax, got: {}", arg);
  41. };
  42. if(min >= max) {
  43. fmt::fatalf("unicode: Error min({:x}) >= max({:x})", min, max);
  44. };
  45. const (cel_width, cel_fmt) = if(max < 0xFFFF) {
  46. // (4 hex, non-break space, 1 char, 2 space) = 8 chars
  47. yield (8u16, "{:.4X} {} ");
  48. } else {
  49. // (8 hex, non-break space, 1 char, 2 space) = 12 chars
  50. yield (12u16, "{:.8X} {} ");
  51. };
  52. match(tsize) {
  53. case void =>
  54. for(let c = min; c <= max; c += 1) {
  55. fmt::printf(cel_fmt, c, c: rune)!;
  56. };
  57. fmt::println()!;
  58. case let ts: tty::ttysize =>
  59. const row_width = ts.columns / cel_width;
  60. for(let r = min; r <= max; r += row_width) {
  61. for(let c = r; c < r+row_width && c <= max; c += 1) {
  62. fmt::printf(cel_fmt, c, c: rune)!;
  63. };
  64. fmt::println()!;
  65. };
  66. };
  67. };
  68. };
  69. // characters to codepoints
  70. fn decode(args: []str) void = {
  71. for(let i = 0z; i < len(args); i += 1) {
  72. const arg = args[i];
  73. const runes = match(strings::torunes(arg)) {
  74. case let r: []rune =>
  75. yield r;
  76. case nomem =>
  77. fmt::fatal("unicode: error: Out of Memory while decoding to codepoints");
  78. };
  79. for(let ri = 0z; ri < len(runes); ri += 1) {
  80. const r = runes[ri]: u32;
  81. // Print as 4 characters min to match UnicodeData.txt formatting
  82. fmt::printfln("{:.4X}", r)!;
  83. };
  84. };
  85. };
  86. // codepoints to characters
  87. fn encode(args: []str) void = {
  88. let chars: []rune = [];
  89. for(let i = 0z; i < len(args); i += 1) {
  90. const arg = args[i];
  91. const r = match(strconv::stou32(arg, strconv::base::HEX)) {
  92. case let u: u32 =>
  93. yield u: rune;
  94. case strconv::overflow =>
  95. fmt::fatalf("unicode: Integer overflow with codepoints argument: {}", arg);
  96. case strconv::invalid =>
  97. fmt::fatalf("unicode: Encoding failed, expected hexadecimal, got: {}", arg);
  98. };
  99. if(text) {
  100. fmt::print(r)!;
  101. } else {
  102. fmt::println(r)!;
  103. };
  104. };
  105. };
  106. export fn main() void = {
  107. const cmd = getopt::parse(os::args,
  108. "Encode/Decode unicode characters",
  109. ('n', "Numeric output"),
  110. ('t', "Running text. Newlines aren't inserted after each character."),
  111. "hexmin-hexmax ... | characters ... | codepoints ...",
  112. );
  113. defer getopt::finish(&cmd);
  114. for (let i = 0z; i < len(cmd.opts); i += 1) {
  115. const opt = cmd.opts[i];
  116. switch(opt.0) {
  117. case 'n' => numeric = true;
  118. case 't' => text = true;
  119. case => abort("unhandled option");
  120. };
  121. };
  122. if(len(cmd.args) == 0) {
  123. getopt::printusage(os::stderr, "unicode", cmd.help)!;
  124. return;
  125. };
  126. if(strings::contains(cmd.args[0], '-')) {
  127. table(cmd.args);
  128. } else {
  129. if(numeric) {
  130. decode(cmd.args);
  131. } else {
  132. encode(cmd.args);
  133. };
  134. };
  135. };