commit: cdd45814275b14cbf537947dc0531518ef06d22d
parent 2d39eea38dda2a5febf2574929717f5d85232a55
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Thu,  2 May 2024 11:29:26 +0200
cmd/cmp: new
Diffstat:
| A | cmd/cmp.1 | 96 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | 
| A | cmd/cmp.c | 175 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | 
| A | test-cmd/cmp.t | 47 | +++++++++++++++++++++++++++++++++++++++++++++++ | 
3 files changed, 318 insertions(+), 0 deletions(-)
diff --git a/cmd/cmp.1 b/cmd/cmp.1
@@ -0,0 +1,96 @@
+.\" utils-std: Collection of commonly available Unix tools
+.\" Copyright 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
+.\" SPDX-License-Identifier: MPL-2.0
+.Dd 2024-05-02
+.Dt CMP 1
+.Os
+.Sh NAME
+.Nm cmp
+.Nd compare two files
+.Sh SYNOPSIS
+.Nm
+.Op Fl ls
+.Op Fl n Ar max_bytes
+.Ar file1
+.Ar file2
+.Sh DESCRIPTION
+The
+.Nm
+utility checks files
+.Ar file1
+and
+.Ar file2
+for a different byte and prints it out.
+.Sh OPTIONS
+The following options are supported:
+.Bl -tag -width __
+.It Fl l
+Write the byte number and if found, the two bytes found different.
+.It Fl n Ar max_bytes
+Only compare the first
+.Ar max_bytes .
+.It Fl s
+Write nothing when files differ, errors are still printed out to stderr.
+.El
+.Sh STDOUT
+When a difference is found, the default message format is:
+.Bd -ragged -offset indent -compact
+.Dq %s %s differ: char %d, line %d\en ,
+.Ar file1 ,
+.Ar file2 ,
+.Aq Ar byte position within line
+.Aq Ar line position
+.Ed
+.Pp
+When
+.Fl l
+is passed, the format is:
+.Bd -ragged -offset indent -compact
+.Dq %d %o %o\en ,
+.Aq Ar byte position ,
+.Aq Ar file1 byte ,
+.Aq Ar file2 byte
+.Ed
+.Pp
+When
+.Fl s
+is passed, nothing should be printed to standard output.
+.Sh STDERR
+If
+.Fl s
+is not passed, and a file is shorter than the other,
+an error with the following format is printed:
+.Bd -ragged -offset indent
+.Dq cmp: EOF on %s line %ld\en ,
+.Aq Ar name of shorter file ,
+.Aq Ar line number
+.Ed
+.Pp
+Note that unlike the other formats, only the following format is standard:
+.Bd -ragged -offset indent -compact
+.Dq cmp: EOF on %s%s\en ,
+.Aq Ar name of shorter file ,
+.Aq Ar additional info
+.Ed
+Where
+.Aq Ar additional info
+can be empty or have any single-line information starting with a space.
+.Sh EXIT STATUS
+.Bl -tag -width __
+.It 0
+The files are identical.
+.It 1
+The files are different.
+.It >1
+An error occurred.
+.El
+.Sh STANDARDS
+.Nm
+should be compliant with the
+.St -p1003.1-2008
+specification.
+The
+.Fl n Ar max_bytes
+option is an extension.
+.Sh AUTHORS
+.An Haelwenn (lanodan) Monnier Aq Mt contact+utils@hacktivis.me
diff --git a/cmd/cmp.c b/cmd/cmp.c
@@ -0,0 +1,175 @@
+// utils-std: Collection of commonly available Unix tools
+// SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
+// SPDX-License-Identifier: MPL-2.0
+
+#define _POSIX_C_SOURCE 200809L
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>  // fopen, fprintf, getline
+#include <stdlib.h> // abort, strtoul
+#include <string.h> // strerror
+#include <unistd.h> // getopt
+
+static bool opt_s = false, opt_l = false;
+static unsigned long max_bytes = 0;
+
+#undef MIN
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+static int
+do_cmp(FILE *file1, const char *name1, FILE *file2, const char *name2)
+{
+	char *line1 = NULL, *line2 = NULL;
+	size_t len1 = 0, len2 = 0;
+	unsigned long pos = 1, ln = 1;
+
+	while(true)
+	{
+		ssize_t nread1 = getline(&line1, &len1, file1);
+		if(nread1 < 0)
+		{
+			if(!ferror(file1)) return 0;
+
+			fprintf(
+			    stderr, "cmp: Failed to read line %ld from file '%s': %s\n", ln, name1, strerror(errno));
+			return 1;
+		}
+
+		ssize_t nread2 = getline(&line2, &len2, file2);
+		if(nread2 < 0)
+		{
+			if(!ferror(file2))
+			{
+				if(!opt_s) fprintf(stderr, "cmp: EOF on %s line %ld\n", name2, ln);
+
+				return 1;
+			}
+
+			fprintf(
+			    stderr, "cmp: Failed to read line %ld from file '%s': %s\n", ln, name1, strerror(errno));
+			return 2;
+		}
+
+		for(ssize_t i = 0; i < MIN(nread1, nread2); i++)
+		{
+			if(max_bytes != 0 && pos + i >= max_bytes) return 0;
+
+			if(line1[i] != line2[i])
+			{
+				if(opt_s) return 1;
+
+				if(opt_l)
+					printf("%ld %o %o\n", pos + i, line1[i], line2[i]);
+				else
+					printf("%s %s differ: char %zd, line %ld\n", name1, name2, i + 1, ln);
+
+				return 1;
+			}
+		}
+
+		assert(nread1 == nread2);
+
+		pos += nread1;
+		ln++;
+	}
+
+	return 0;
+}
+
+static void
+usage()
+{
+	fprintf(stderr, "Usage: cmp [-l|-s] [-n max_bytes] file1 file2\n");
+}
+
+int
+main(int argc, char *argv[])
+{
+	char *endptr = NULL;
+
+	int c = -1;
+	while((c = getopt(argc, argv, ":ln:s")) != -1)
+	{
+		switch(c)
+		{
+		case 'l':
+			opt_l = true;
+			break;
+		case 's':
+			opt_s = true;
+			break;
+		case 'n':
+			errno     = 0;
+			max_bytes = strtoul(optarg, &endptr, 0);
+			if(errno != 0)
+			{
+				fprintf(stderr, "cmp: Error: Failed parsing '-n %s': %s\n", optarg, strerror(errno));
+				return 1;
+			}
+			if(endptr != NULL && endptr[0] != 0)
+			{
+				fprintf(
+				    stderr, "cmp: Error: Non-numeric characters passed to '-n %s': %s\n", optarg, endptr);
+				return 1;
+			}
+			break;
+		case ':':
+			fprintf(stderr, "cmp: Error: Missing operand for option: '-%c'\n", optopt);
+			usage();
+			return 1;
+		case '?':
+			fprintf(stderr, "cmp: Error: Unrecognised option: '-%c'\n", optopt);
+			usage();
+			return 1;
+		default:
+			abort();
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if(argc != 2)
+	{
+		fprintf(stderr, "cmp: Expected 2 arguments, got %d arguments\n", argc);
+		return 1;
+	}
+
+	if(strcmp(argv[0], argv[1]) == 0) return 0;
+
+	assert(errno == 0);
+
+	FILE *file1 = NULL;
+	if(argv[0][0] == '-' && argv[0][1] == 0)
+		file1 = stdin;
+	else
+	{
+		file1 = fopen(argv[0], "r");
+		if(file1 == NULL)
+		{
+			fprintf(stderr, "cmp: Error opening ā%sā: %s\n", argv[0], strerror(errno));
+			return 1;
+		}
+	}
+
+	FILE *file2 = NULL;
+	if(argv[1][0] == '-' && argv[1][1] == 0)
+		file2 = stdin;
+	else
+	{
+		file2 = fopen(argv[1], "r");
+		if(file2 == NULL)
+		{
+			fprintf(stderr, "cmp: Error opening ā%sā: %s\n", argv[1], strerror(errno));
+			return 1;
+		}
+	}
+
+	int ret = do_cmp(file1, argv[0], file2, argv[1]);
+
+	fclose(file1);
+	fclose(file2);
+
+	return ret;
+}
diff --git a/test-cmd/cmp.t b/test-cmd/cmp.t
@@ -0,0 +1,47 @@
+#!/usr/bin/env cram
+# SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
+# SPDX-License-Identifier: MPL-2.0
+
+  $ export PATH="$TESTDIR/../cmd:$PATH"
+
+  $ test "$(command -v cmp)" = "$TESTDIR/../cmd/cmp"
+
+  $ printf 'foo' > foo
+  $ printf 'bar' > bar
+  $ cmp foo foo
+  $ cmp bar bar
+  $ cmp foo bar
+  foo bar differ: char 1, line 1
+  [1]
+  $ cmp -s foo bar
+  [1]
+  $ cmp bar foo
+  bar foo differ: char 1, line 1
+  [1]
+  $ cmp -s bar foo
+  [1]
+
+  $ seq 1 3 > foo
+  $ seq 1 2 > bar
+  $ cmp foo bar
+  cmp: EOF on bar line 3
+  [1]
+  $ cmp -s foo bar
+  [1]
+  $ echo '3 different' > bar
+  $ cmp foo bar
+  foo bar differ: char 1, line 1
+  [1]
+  $ cmp -s foo bar
+  [1]
+
+  $ seq 1 3 > foo
+  $ seq 1 2 > bar
+  $ cmp -l foo bar
+  cmp: EOF on bar line 3
+  [1]
+  $ cmp -n 4 foo bar
+
+  $ rm foo bar
+  $ find .
+  .