logo

utils-std

Collection of commonly available Unix tools
commit: cdd45814275b14cbf537947dc0531518ef06d22d
parent 2d39eea38dda2a5febf2574929717f5d85232a55
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Thu,  2 May 2024 11:29:26 +0200

cmd/cmp: new

Diffstat:

Acmd/cmp.196+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acmd/cmp.c175+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest-cmd/cmp.t47+++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 318 insertions(+), 0 deletions(-)

diff --git a/cmd/cmp.1 b/cmd/cmp.1 @@ -0,0 +1,96 @@ +.\" utils-std: Collection of commonly available Unix tools +.\" Copyright 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me> +.\" SPDX-License-Identifier: MPL-2.0 +.Dd 2024-05-02 +.Dt CMP 1 +.Os +.Sh NAME +.Nm cmp +.Nd compare two files +.Sh SYNOPSIS +.Nm +.Op Fl ls +.Op Fl n Ar max_bytes +.Ar file1 +.Ar file2 +.Sh DESCRIPTION +The +.Nm +utility checks files +.Ar file1 +and +.Ar file2 +for a different byte and prints it out. +.Sh OPTIONS +The following options are supported: +.Bl -tag -width __ +.It Fl l +Write the byte number and if found, the two bytes found different. +.It Fl n Ar max_bytes +Only compare the first +.Ar max_bytes . +.It Fl s +Write nothing when files differ, errors are still printed out to stderr. +.El +.Sh STDOUT +When a difference is found, the default message format is: +.Bd -ragged -offset indent -compact +.Dq %s %s differ: char %d, line %d\en , +.Ar file1 , +.Ar file2 , +.Aq Ar byte position within line +.Aq Ar line position +.Ed +.Pp +When +.Fl l +is passed, the format is: +.Bd -ragged -offset indent -compact +.Dq %d %o %o\en , +.Aq Ar byte position , +.Aq Ar file1 byte , +.Aq Ar file2 byte +.Ed +.Pp +When +.Fl s +is passed, nothing should be printed to standard output. +.Sh STDERR +If +.Fl s +is not passed, and a file is shorter than the other, +an error with the following format is printed: +.Bd -ragged -offset indent +.Dq cmp: EOF on %s line %ld\en , +.Aq Ar name of shorter file , +.Aq Ar line number +.Ed +.Pp +Note that unlike the other formats, only the following format is standard: +.Bd -ragged -offset indent -compact +.Dq cmp: EOF on %s%s\en , +.Aq Ar name of shorter file , +.Aq Ar additional info +.Ed +Where +.Aq Ar additional info +can be empty or have any single-line information starting with a space. +.Sh EXIT STATUS +.Bl -tag -width __ +.It 0 +The files are identical. +.It 1 +The files are different. +.It >1 +An error occurred. +.El +.Sh STANDARDS +.Nm +should be compliant with the +.St -p1003.1-2008 +specification. +The +.Fl n Ar max_bytes +option is an extension. +.Sh AUTHORS +.An Haelwenn (lanodan) Monnier Aq Mt contact+utils@hacktivis.me diff --git a/cmd/cmp.c b/cmd/cmp.c @@ -0,0 +1,175 @@ +// utils-std: Collection of commonly available Unix tools +// SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me> +// SPDX-License-Identifier: MPL-2.0 + +#define _POSIX_C_SOURCE 200809L +#include <assert.h> +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> // fopen, fprintf, getline +#include <stdlib.h> // abort, strtoul +#include <string.h> // strerror +#include <unistd.h> // getopt + +static bool opt_s = false, opt_l = false; +static unsigned long max_bytes = 0; + +#undef MIN +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) + +static int +do_cmp(FILE *file1, const char *name1, FILE *file2, const char *name2) +{ + char *line1 = NULL, *line2 = NULL; + size_t len1 = 0, len2 = 0; + unsigned long pos = 1, ln = 1; + + while(true) + { + ssize_t nread1 = getline(&line1, &len1, file1); + if(nread1 < 0) + { + if(!ferror(file1)) return 0; + + fprintf( + stderr, "cmp: Failed to read line %ld from file '%s': %s\n", ln, name1, strerror(errno)); + return 1; + } + + ssize_t nread2 = getline(&line2, &len2, file2); + if(nread2 < 0) + { + if(!ferror(file2)) + { + if(!opt_s) fprintf(stderr, "cmp: EOF on %s line %ld\n", name2, ln); + + return 1; + } + + fprintf( + stderr, "cmp: Failed to read line %ld from file '%s': %s\n", ln, name1, strerror(errno)); + return 2; + } + + for(ssize_t i = 0; i < MIN(nread1, nread2); i++) + { + if(max_bytes != 0 && pos + i >= max_bytes) return 0; + + if(line1[i] != line2[i]) + { + if(opt_s) return 1; + + if(opt_l) + printf("%ld %o %o\n", pos + i, line1[i], line2[i]); + else + printf("%s %s differ: char %zd, line %ld\n", name1, name2, i + 1, ln); + + return 1; + } + } + + assert(nread1 == nread2); + + pos += nread1; + ln++; + } + + return 0; +} + +static void +usage() +{ + fprintf(stderr, "Usage: cmp [-l|-s] [-n max_bytes] file1 file2\n"); +} + +int +main(int argc, char *argv[]) +{ + char *endptr = NULL; + + int c = -1; + while((c = getopt(argc, argv, ":ln:s")) != -1) + { + switch(c) + { + case 'l': + opt_l = true; + break; + case 's': + opt_s = true; + break; + case 'n': + errno = 0; + max_bytes = strtoul(optarg, &endptr, 0); + if(errno != 0) + { + fprintf(stderr, "cmp: Error: Failed parsing '-n %s': %s\n", optarg, strerror(errno)); + return 1; + } + if(endptr != NULL && endptr[0] != 0) + { + fprintf( + stderr, "cmp: Error: Non-numeric characters passed to '-n %s': %s\n", optarg, endptr); + return 1; + } + break; + case ':': + fprintf(stderr, "cmp: Error: Missing operand for option: '-%c'\n", optopt); + usage(); + return 1; + case '?': + fprintf(stderr, "cmp: Error: Unrecognised option: '-%c'\n", optopt); + usage(); + return 1; + default: + abort(); + } + } + + argc -= optind; + argv += optind; + + if(argc != 2) + { + fprintf(stderr, "cmp: Expected 2 arguments, got %d arguments\n", argc); + return 1; + } + + if(strcmp(argv[0], argv[1]) == 0) return 0; + + assert(errno == 0); + + FILE *file1 = NULL; + if(argv[0][0] == '-' && argv[0][1] == 0) + file1 = stdin; + else + { + file1 = fopen(argv[0], "r"); + if(file1 == NULL) + { + fprintf(stderr, "cmp: Error opening ā€˜%sā€™: %s\n", argv[0], strerror(errno)); + return 1; + } + } + + FILE *file2 = NULL; + if(argv[1][0] == '-' && argv[1][1] == 0) + file2 = stdin; + else + { + file2 = fopen(argv[1], "r"); + if(file2 == NULL) + { + fprintf(stderr, "cmp: Error opening ā€˜%sā€™: %s\n", argv[1], strerror(errno)); + return 1; + } + } + + int ret = do_cmp(file1, argv[0], file2, argv[1]); + + fclose(file1); + fclose(file2); + + return ret; +} diff --git a/test-cmd/cmp.t b/test-cmd/cmp.t @@ -0,0 +1,47 @@ +#!/usr/bin/env cram +# SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me> +# SPDX-License-Identifier: MPL-2.0 + + $ export PATH="$TESTDIR/../cmd:$PATH" + + $ test "$(command -v cmp)" = "$TESTDIR/../cmd/cmp" + + $ printf 'foo' > foo + $ printf 'bar' > bar + $ cmp foo foo + $ cmp bar bar + $ cmp foo bar + foo bar differ: char 1, line 1 + [1] + $ cmp -s foo bar + [1] + $ cmp bar foo + bar foo differ: char 1, line 1 + [1] + $ cmp -s bar foo + [1] + + $ seq 1 3 > foo + $ seq 1 2 > bar + $ cmp foo bar + cmp: EOF on bar line 3 + [1] + $ cmp -s foo bar + [1] + $ echo '3 different' > bar + $ cmp foo bar + foo bar differ: char 1, line 1 + [1] + $ cmp -s foo bar + [1] + + $ seq 1 3 > foo + $ seq 1 2 > bar + $ cmp -l foo bar + cmp: EOF on bar line 3 + [1] + $ cmp -n 4 foo bar + + $ rm foo bar + $ find . + .