commit: cdd45814275b14cbf537947dc0531518ef06d22d
parent 2d39eea38dda2a5febf2574929717f5d85232a55
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date: Thu, 2 May 2024 11:29:26 +0200
cmd/cmp: new
Diffstat:
A | cmd/cmp.1 | 96 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | cmd/cmp.c | 175 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | test-cmd/cmp.t | 47 | +++++++++++++++++++++++++++++++++++++++++++++++ |
3 files changed, 318 insertions(+), 0 deletions(-)
diff --git a/cmd/cmp.1 b/cmd/cmp.1
@@ -0,0 +1,96 @@
+.\" utils-std: Collection of commonly available Unix tools
+.\" Copyright 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
+.\" SPDX-License-Identifier: MPL-2.0
+.Dd 2024-05-02
+.Dt CMP 1
+.Os
+.Sh NAME
+.Nm cmp
+.Nd compare two files
+.Sh SYNOPSIS
+.Nm
+.Op Fl ls
+.Op Fl n Ar max_bytes
+.Ar file1
+.Ar file2
+.Sh DESCRIPTION
+The
+.Nm
+utility checks files
+.Ar file1
+and
+.Ar file2
+for a different byte and prints it out.
+.Sh OPTIONS
+The following options are supported:
+.Bl -tag -width __
+.It Fl l
+Write the byte number and if found, the two bytes found different.
+.It Fl n Ar max_bytes
+Only compare the first
+.Ar max_bytes .
+.It Fl s
+Write nothing when files differ, errors are still printed out to stderr.
+.El
+.Sh STDOUT
+When a difference is found, the default message format is:
+.Bd -ragged -offset indent -compact
+.Dq %s %s differ: char %d, line %d\en ,
+.Ar file1 ,
+.Ar file2 ,
+.Aq Ar byte position within line
+.Aq Ar line position
+.Ed
+.Pp
+When
+.Fl l
+is passed, the format is:
+.Bd -ragged -offset indent -compact
+.Dq %d %o %o\en ,
+.Aq Ar byte position ,
+.Aq Ar file1 byte ,
+.Aq Ar file2 byte
+.Ed
+.Pp
+When
+.Fl s
+is passed, nothing should be printed to standard output.
+.Sh STDERR
+If
+.Fl s
+is not passed, and a file is shorter than the other,
+an error with the following format is printed:
+.Bd -ragged -offset indent
+.Dq cmp: EOF on %s line %ld\en ,
+.Aq Ar name of shorter file ,
+.Aq Ar line number
+.Ed
+.Pp
+Note that unlike the other formats, only the following format is standard:
+.Bd -ragged -offset indent -compact
+.Dq cmp: EOF on %s%s\en ,
+.Aq Ar name of shorter file ,
+.Aq Ar additional info
+.Ed
+Where
+.Aq Ar additional info
+can be empty or have any single-line information starting with a space.
+.Sh EXIT STATUS
+.Bl -tag -width __
+.It 0
+The files are identical.
+.It 1
+The files are different.
+.It >1
+An error occurred.
+.El
+.Sh STANDARDS
+.Nm
+should be compliant with the
+.St -p1003.1-2008
+specification.
+The
+.Fl n Ar max_bytes
+option is an extension.
+.Sh AUTHORS
+.An Haelwenn (lanodan) Monnier Aq Mt contact+utils@hacktivis.me
diff --git a/cmd/cmp.c b/cmd/cmp.c
@@ -0,0 +1,175 @@
+// utils-std: Collection of commonly available Unix tools
+// SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
+// SPDX-License-Identifier: MPL-2.0
+
+#define _POSIX_C_SOURCE 200809L
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h> // fopen, fprintf, getline
+#include <stdlib.h> // abort, strtoul
+#include <string.h> // strerror
+#include <unistd.h> // getopt
+
+static bool opt_s = false, opt_l = false;
+static unsigned long max_bytes = 0;
+
+#undef MIN
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+static int
+do_cmp(FILE *file1, const char *name1, FILE *file2, const char *name2)
+{
+ char *line1 = NULL, *line2 = NULL;
+ size_t len1 = 0, len2 = 0;
+ unsigned long pos = 1, ln = 1;
+
+ while(true)
+ {
+ ssize_t nread1 = getline(&line1, &len1, file1);
+ if(nread1 < 0)
+ {
+ if(!ferror(file1)) return 0;
+
+ fprintf(
+ stderr, "cmp: Failed to read line %ld from file '%s': %s\n", ln, name1, strerror(errno));
+ return 1;
+ }
+
+ ssize_t nread2 = getline(&line2, &len2, file2);
+ if(nread2 < 0)
+ {
+ if(!ferror(file2))
+ {
+ if(!opt_s) fprintf(stderr, "cmp: EOF on %s line %ld\n", name2, ln);
+
+ return 1;
+ }
+
+ fprintf(
+ stderr, "cmp: Failed to read line %ld from file '%s': %s\n", ln, name1, strerror(errno));
+ return 2;
+ }
+
+ for(ssize_t i = 0; i < MIN(nread1, nread2); i++)
+ {
+ if(max_bytes != 0 && pos + i >= max_bytes) return 0;
+
+ if(line1[i] != line2[i])
+ {
+ if(opt_s) return 1;
+
+ if(opt_l)
+ printf("%ld %o %o\n", pos + i, line1[i], line2[i]);
+ else
+ printf("%s %s differ: char %zd, line %ld\n", name1, name2, i + 1, ln);
+
+ return 1;
+ }
+ }
+
+ assert(nread1 == nread2);
+
+ pos += nread1;
+ ln++;
+ }
+
+ return 0;
+}
+
+static void
+usage()
+{
+ fprintf(stderr, "Usage: cmp [-l|-s] [-n max_bytes] file1 file2\n");
+}
+
+int
+main(int argc, char *argv[])
+{
+ char *endptr = NULL;
+
+ int c = -1;
+ while((c = getopt(argc, argv, ":ln:s")) != -1)
+ {
+ switch(c)
+ {
+ case 'l':
+ opt_l = true;
+ break;
+ case 's':
+ opt_s = true;
+ break;
+ case 'n':
+ errno = 0;
+ max_bytes = strtoul(optarg, &endptr, 0);
+ if(errno != 0)
+ {
+ fprintf(stderr, "cmp: Error: Failed parsing '-n %s': %s\n", optarg, strerror(errno));
+ return 1;
+ }
+ if(endptr != NULL && endptr[0] != 0)
+ {
+ fprintf(
+ stderr, "cmp: Error: Non-numeric characters passed to '-n %s': %s\n", optarg, endptr);
+ return 1;
+ }
+ break;
+ case ':':
+ fprintf(stderr, "cmp: Error: Missing operand for option: '-%c'\n", optopt);
+ usage();
+ return 1;
+ case '?':
+ fprintf(stderr, "cmp: Error: Unrecognised option: '-%c'\n", optopt);
+ usage();
+ return 1;
+ default:
+ abort();
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if(argc != 2)
+ {
+ fprintf(stderr, "cmp: Expected 2 arguments, got %d arguments\n", argc);
+ return 1;
+ }
+
+ if(strcmp(argv[0], argv[1]) == 0) return 0;
+
+ assert(errno == 0);
+
+ FILE *file1 = NULL;
+ if(argv[0][0] == '-' && argv[0][1] == 0)
+ file1 = stdin;
+ else
+ {
+ file1 = fopen(argv[0], "r");
+ if(file1 == NULL)
+ {
+ fprintf(stderr, "cmp: Error opening ā%sā: %s\n", argv[0], strerror(errno));
+ return 1;
+ }
+ }
+
+ FILE *file2 = NULL;
+ if(argv[1][0] == '-' && argv[1][1] == 0)
+ file2 = stdin;
+ else
+ {
+ file2 = fopen(argv[1], "r");
+ if(file2 == NULL)
+ {
+ fprintf(stderr, "cmp: Error opening ā%sā: %s\n", argv[1], strerror(errno));
+ return 1;
+ }
+ }
+
+ int ret = do_cmp(file1, argv[0], file2, argv[1]);
+
+ fclose(file1);
+ fclose(file2);
+
+ return ret;
+}
diff --git a/test-cmd/cmp.t b/test-cmd/cmp.t
@@ -0,0 +1,47 @@
+#!/usr/bin/env cram
+# SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
+# SPDX-License-Identifier: MPL-2.0
+
+ $ export PATH="$TESTDIR/../cmd:$PATH"
+
+ $ test "$(command -v cmp)" = "$TESTDIR/../cmd/cmp"
+
+ $ printf 'foo' > foo
+ $ printf 'bar' > bar
+ $ cmp foo foo
+ $ cmp bar bar
+ $ cmp foo bar
+ foo bar differ: char 1, line 1
+ [1]
+ $ cmp -s foo bar
+ [1]
+ $ cmp bar foo
+ bar foo differ: char 1, line 1
+ [1]
+ $ cmp -s bar foo
+ [1]
+
+ $ seq 1 3 > foo
+ $ seq 1 2 > bar
+ $ cmp foo bar
+ cmp: EOF on bar line 3
+ [1]
+ $ cmp -s foo bar
+ [1]
+ $ echo '3 different' > bar
+ $ cmp foo bar
+ foo bar differ: char 1, line 1
+ [1]
+ $ cmp -s foo bar
+ [1]
+
+ $ seq 1 3 > foo
+ $ seq 1 2 > bar
+ $ cmp -l foo bar
+ cmp: EOF on bar line 3
+ [1]
+ $ cmp -n 4 foo bar
+
+ $ rm foo bar
+ $ find .
+ .