Collection of utilities inspired by Plan9
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Thu, 31 Aug 2023 20:36:10 +0200

Initial commit, includes cmd/unicode


If a copy of the MPL was not distributed with this + file, You can obtain one at https://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/Makefile b/Makefile @@ -0,0 +1,35 @@ +# Collection of utilities inspired by Plan9 +# SPDX-FileCopyrightText: 2023 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me> +# SPDX-License-Identifier: MPL-2.0 + +HARE ?= hare +PREFIX ?= /usr/local +BINDIR ?= $(PREFIX)/bin/ +MANDIR ?= $(PREFIX)/share/man/ +MAN1DIR ?= $(MANDIR)/man1/ + +DESTDIR ?= + +CMDS = cmd/unicode +MANS = cmd/unicode.1 + +cmd/unicode: cmd/unicode.ha + +all: $(CMDS) + +.SUFFIXES: .ha + +.ha: + $(HARE) $(HAREFLAGS) build -o $@ $< + +.PHONY: lint +lint: + mandoc -T lint -W warning ${MANS} + reuse lint + +.PHONY: install +install: all + mkdir -p ${DESTDIR}/${BINDIR} + cp -p ${CMDS} ${DESTDIR}/${BINDIR} + mkdir -p ${DESTDIR}/${MAN1DIR} + cp ${MANS} ${DESTDIR}/${MAN1DIR} diff --git a/cmd/unicode.1 b/cmd/unicode.1 @@ -0,0 +1,54 @@ +.\" Collection of utilities inspired by Plan9 +.\" Copyright 2023 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me> +.\" SPDX-License-Identifier: MPL-2.0 +.Dd 2023-08-31 +.Dt UNICODE 1 +.Os +.Sh NAME +.Nm unicode +.Nd Encode/Decode unicode characters +.Sh SYNOPSIS +.Nm +.Ar hexmin Ns - Ns Ar hexmax +.Nm +.Fl n +.Ar characters ... +.Nm +.Op Fl t +.Ar codepoints ... +.Sh DESCRIPTION +.Bl -tag -width Ds +.It Nm Ar hexmin Ns - Ns Ar hexmax +prints a table of unicode characters, +.Ar hexmin +and +.Ar hexmax +specifies respectively the first and last characters of the table, +each in hexadecimal. +.It Nm Fl n Ar characters ... +prints each character's codepoint on a separated line. +.It Nm Oo Fl t Oc Ar codepoints ... +encodes each given codepoint, written in hexadecimal and prints them out. +Separated by newlines unless +.Fl t +is specified. +.El +.Sh EXIT STATUS +.Ex -std +.Sh EXAMPLES +.Bl -tag -width Ds +.It Ql unicode -n hello +Print unicode values of characters in the string "hello" +.It Ql unicode 2200-22f1 +Print a table of miscellaneous mathematical symbols. +.It Ql unicode -t 03b3 03b5 03b9 03b1 +Prints +.Ql γεια +(with no newline, +.Ql 0a +wasn't given) +.El +.Sh SEE ALSO +.Xr unicode 7 +.Sh AUTHORS +.An Haelwenn (lanodan) Monnier Aq Mt contact+9utils@hacktivis.me diff --git a/cmd/unicode.ha b/cmd/unicode.ha @@ -0,0 +1,152 @@ +// Collection of utilities inspired by Plan9 +// SPDX-FileCopyrightText: 2023 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me> +// SPDX-License-Identifier: MPL-2.0 + +use fmt; +use getopt; +use io; +use os; +use strconv; +use strings; +use unix::tty; + +let numeric = false; +let text = false; + +// print table from hexmin-hexmax +fn table(args: []str) void = { + const tsize = if(tty::isatty(os::stdout_file)) { + yield match(tty::winsize(os::stdout_file)) { + case let s: tty::ttysize => + yield s; + case let e: tty::error => + fmt::errorfln("unicode: Warning: Can't get current terminal size: {}", tty::strerror(e))!; + }; + } else { + yield void; + }; + + for(let i = 0z; i < len(args); i += 1) { + const arg = args[i]; + let (min, max) = strings::cut(arg, "-"); + + let min = match(strconv::stou32b(min, strconv::base::HEX)) { + case let u: u32 => yield u; + case strconv::overflow => + fmt::fatalf("unicode: Integer overflow with hexmin argument: {}", arg); + case strconv::invalid => + fmt::fatalf("unicode: Expected hexadecimal for hexmin, got: {}", arg); + }; + let max = match(strconv::stou32b(max, strconv::base::HEX)) { + case let u: u32 => yield u; + case strconv::overflow => + fmt::fatalf("unicode: Integer overflow with hexmax argument: {}", arg); + case strconv::invalid => + fmt::fatalf("unicode: Expected hexadecimal for hexmax, got: {}", arg); + }; + + if(min >= max) { + fmt::fatalf("unicode: Error min({:x}) >= max({:x})", min, max); + }; + + const (cel_width, cel_fmt) = if(max < 0xFFFF) { + // (1 char, non-break space, 4 hex, 2 space) = 8 chars + yield (8u16, "{} {:04x} "); + } else { + // (1 char, non-break space, 8 hex, 2 space) = 12 chars + yield (12u16, "{} {:08x} "); + }; + + match(tsize) { + case void => + for(let c = min; c <= max; c += 1) { + fmt::printf(cel_fmt, c: rune, c)!; + }; + fmt::println()!; + case let ts: tty::ttysize => + const row_width = ts.columns / cel_width; + + for(let r = min; r <= max; r += row_width) { + for(let c = r; c < r+row_width && c <= max; c += 1) { + fmt::printf(cel_fmt, c: rune, c)!; + }; + fmt::println()!; + }; + }; + }; +}; + +// characters to codepoints +fn decode(args: []str) void = { + for(let i = 0z; i < len(args); i += 1) { + const arg = args[i]; + const runes = strings::torunes(arg); + + for(let ri = 0z; ri < len(runes); ri += 1) { + const r = runes[ri]: u32; + if(r > 0xFFFF) { + fmt::printfln("{:08x}", r)!; + } else { + fmt::printfln("{:04x}", r)!; + }; + }; + }; +}; + +// codepoints to characters +fn encode(args: []str) void = { + let chars: []rune = []; + + for(let i = 0z; i < len(args); i += 1) { + const arg = args[i]; + + const r = match(strconv::stou32b(arg, strconv::base::HEX)) { + case let u: u32 => + yield u: rune; + case strconv::overflow => + fmt::fatalf("unicode: Integer overflow with codepoints argument: {}", arg); + case strconv::invalid => + fmt::fatalf("unicode: Encoding failed, expected hexadecimal, got: {}", arg); + }; + + if(text) { + fmt::print(r)!; + } else { + fmt::println(r)!; + }; + }; +}; + +export fn main() void = { + const cmd = getopt::parse(os::args, + "Encode/Decode unicode characters", + ('n', "Numeric output"), + ('t', "Running text. Newlines aren't inserted after each character."), + "hexmin-hexmax ... | characters ... | codepoints ...", + ); + defer getopt::finish(&cmd); + + for (let i = 0z; i < len(cmd.opts); i += 1) { + const opt = cmd.opts[i]; + switch(opt.0) { + case 'n' => numeric = true; + case 't' => text = true; + case => abort("unhandled option"); + }; + }; + + if(len(cmd.args) == 0) { + getopt::printusage(os::stderr, "unicode", cmd.help)!; + return; + }; + + if(strings::contains(cmd.args[0], '-')) { + table(cmd.args); + } else { + if(numeric) { + decode(cmd.args); + } else { + encode(cmd.args); + }; + }; +};