unicode.ha (3769B)
- // Collection of utilities inspired by Plan9
- // SPDX-FileCopyrightText: 2023 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
- // SPDX-License-Identifier: MPL-2.0
- use fmt;
- use getopt;
- use io;
- use os;
- use strconv;
- use strings;
- use unix::tty;
- let numeric = false;
- let text = false;
- // print table from hexmin-hexmax
- fn table(args: []str) void = {
- const tsize = if(tty::isatty(os::stdout_file)) {
- yield match(tty::winsize(os::stdout_file)) {
- case let s: tty::ttysize =>
- yield s;
- case let e: tty::error =>
- fmt::errorfln("unicode: Warning: Can't get current terminal size: {}", tty::strerror(e))!;
- };
- } else {
- yield void;
- };
- for(let i = 0z; i < len(args); i += 1) {
- const arg = args[i];
- let (min, max) = strings::cut(arg, "-");
- let min = match(strconv::stou32(min, strconv::base::HEX)) {
- case let u: u32 => yield u;
- case strconv::overflow =>
- fmt::fatalf("unicode: Integer overflow with hexmin argument: {}", arg);
- case strconv::invalid =>
- fmt::fatalf("unicode: Expected hexadecimal for hexmin, got: {}", arg);
- };
- let max = match(strconv::stou32(max, strconv::base::HEX)) {
- case let u: u32 => yield u;
- case strconv::overflow =>
- fmt::fatalf("unicode: Integer overflow with hexmax argument: {}", arg);
- case strconv::invalid =>
- fmt::fatalf("unicode: Expected hexadecimal for hexmax, got: {}", arg);
- };
- if(min >= max) {
- fmt::fatalf("unicode: Error min({:x}) >= max({:x})", min, max);
- };
- const (cel_width, cel_fmt) = if(max < 0xFFFF) {
- // (4 hex, non-break space, 1 char, 2 space) = 8 chars
- yield (8u16, "{:.4X} {} ");
- } else {
- // (8 hex, non-break space, 1 char, 2 space) = 12 chars
- yield (12u16, "{:.8X} {} ");
- };
- match(tsize) {
- case void =>
- for(let c = min; c <= max; c += 1) {
- fmt::printf(cel_fmt, c, c: rune)!;
- };
- fmt::println()!;
- case let ts: tty::ttysize =>
- const row_width = ts.columns / cel_width;
- for(let r = min; r <= max; r += row_width) {
- for(let c = r; c < r+row_width && c <= max; c += 1) {
- fmt::printf(cel_fmt, c, c: rune)!;
- };
- fmt::println()!;
- };
- };
- };
- };
- // characters to codepoints
- fn decode(args: []str) void = {
- for(let i = 0z; i < len(args); i += 1) {
- const arg = args[i];
- const runes = strings::torunes(arg);
- for(let ri = 0z; ri < len(runes); ri += 1) {
- const r = runes[ri]: u32;
- // Print as 4 characters min to match UnicodeData.txt formatting
- fmt::printfln("{:.4X}", r)!;
- };
- };
- };
- // codepoints to characters
- fn encode(args: []str) void = {
- let chars: []rune = [];
- for(let i = 0z; i < len(args); i += 1) {
- const arg = args[i];
- const r = match(strconv::stou32(arg, strconv::base::HEX)) {
- case let u: u32 =>
- yield u: rune;
- case strconv::overflow =>
- fmt::fatalf("unicode: Integer overflow with codepoints argument: {}", arg);
- case strconv::invalid =>
- fmt::fatalf("unicode: Encoding failed, expected hexadecimal, got: {}", arg);
- };
- if(text) {
- fmt::print(r)!;
- } else {
- fmt::println(r)!;
- };
- };
- };
- export fn main() void = {
- const cmd = getopt::parse(os::args,
- "Encode/Decode unicode characters",
- ('n', "Numeric output"),
- ('t', "Running text. Newlines aren't inserted after each character."),
- "hexmin-hexmax ... | characters ... | codepoints ...",
- );
- defer getopt::finish(&cmd);
- for (let i = 0z; i < len(cmd.opts); i += 1) {
- const opt = cmd.opts[i];
- switch(opt.0) {
- case 'n' => numeric = true;
- case 't' => text = true;
- case => abort("unhandled option");
- };
- };
- if(len(cmd.args) == 0) {
- getopt::printusage(os::stderr, "unicode", cmd.help)!;
- return;
- };
- if(strings::contains(cmd.args[0], '-')) {
- table(cmd.args);
- } else {
- if(numeric) {
- decode(cmd.args);
- } else {
- encode(cmd.args);
- };
- };
- };