commit: 2261098f98f1d00a21eb420522ad8dae17e00e2a
parent d6e49c181cd115cc2df89dd06cd2f11cd2564137
Author: fosslinux <fosslinux@aussies.space>
Date: Sat, 24 Feb 2024 09:37:32 +0000
Merge pull request #445 from fosslinux/check-substitutes
Check "substitutes"
Diffstat:
6 files changed, 182 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -21,7 +21,7 @@ jobs:
- name: Install pylint
run: sudo pip3 install pylint
- name: pylint
- run: pylint rootfs.py lib/utils.py lib/generator.py lib/target.py --disable=duplicate-code
+ run: pylint rootfs.py lib/utils.py lib/generator.py lib/target.py check_substitutes.py --disable=duplicate-code
shellcheck:
name: Lint shell files
@@ -42,3 +42,12 @@ jobs:
run: sudo pip3 install reuse
- name: reuse
run: reuse lint
+
+ substitutes:
+ name: Check validity of substituted tarballs
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout repo
+ uses: actions/checkout@v3
+ - name: Check validity of substitutes
+ run: ./check_substitutes.py
diff --git a/check_substitutes.py b/check_substitutes.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# SPDX-FileCopyrightText: 2024 fosslinux <fosslinux@aussies.space>
+
+"""Check that substituted files are the same."""
+import bz2
+import filecmp
+import gzip
+import itertools
+import lzma
+import shutil
+import tarfile
+import tempfile
+import sys
+import os
+
+from lib.generator import Generator
+
+# Get a temporary directory to work in
+working = tempfile.mkdtemp()
+
+# Colour constants
+# pylint: disable=too-few-public-methods
+class Colors():
+ """ANSI Color Codes"""
+ GREY = "\033[90m"
+ RED = "\033[91m"
+ GREEN = "\033[92m"
+ ORANGE = "\033[91m\033[93m"
+ YELLOW = "\033[93m"
+ END = "\033[0m"
+
+def traverse_path(base_root):
+ """Takes a path and returns a set of all directories and files in that path."""
+ all_dirs = set()
+ all_files = set()
+ for root, directories, files in os.walk(base_root, topdown=True):
+ for d in directories:
+ all_dirs.add(os.path.join(root, d).lstrip(base_root))
+ for f in files:
+ all_files.add(os.path.join(root, f).lstrip(base_root))
+ return (all_dirs, all_files)
+
+class Distfile():
+ """Represents one distfile and operations performed on it."""
+ def __init__(self, i, url):
+ self.i = i
+ self.url = url
+ self.out_file = f"{i}-{os.path.basename(url)}"
+ self.filepath = ""
+
+ def download(self):
+ """Downloads the distfile."""
+ Generator.download_file(self.url, working, self.out_file, silent=True)
+ self.filepath = os.path.join(working, self.out_file)
+
+ def decompress(self):
+ """Decompresses the distfile."""
+ compression = self.out_file.split('.')[-1]
+ decompress_func = {
+ "gz": gzip.open,
+ "tgz": gzip.open,
+ "bz2": bz2.open,
+ "xz": lzma.open,
+ "lzma": lzma.open
+ }
+ if compression not in decompress_func:
+ # No decompression needed
+ return
+ # Remove the compression extension
+ new_path = '.'.join(self.filepath.split('.')[:-1])
+ # tgz -> .tar
+ if compression == "tgz":
+ new_path = f"{new_path}.tar"
+ # Move the decompressed binary stream to a new file
+ with decompress_func[compression](self.filepath, 'rb') as fin:
+ with open(new_path, 'wb') as fout:
+ shutil.copyfileobj(fin, fout)
+ self.filepath = new_path
+
+ def extract(self):
+ """Extracts the distfile."""
+ # Sanity check
+ if not tarfile.is_tarfile(self.filepath):
+ return
+ out_dir = os.path.join(working, f"{self.i}")
+ os.mkdir(out_dir)
+ with tarfile.open(self.filepath, 'r') as f:
+ f.extractall(path=out_dir)
+ self.filepath = out_dir
+
+ # It makes more sense here to label them d1 and d2 rather than have one be self.
+ # pylint: disable=no-self-argument
+ def compare(d1, d2):
+ """Compares the distfile to another distfile."""
+ if not os.path.isdir(d1.filepath):
+ # Compare files
+ return filecmp.cmp(d1.filepath, d2.filepath, shallow=False)
+ if not os.path.isdir(d2.filepath):
+ # Then, d2 is a file and d1 is a directory
+ return False
+ # Otherwise it's two directories
+ dirnames1, filenames1 = traverse_path(d1.filepath)
+ dirnames2, filenames2 = traverse_path(d2.filepath)
+ if dirnames1 != dirnames2:
+ return False
+ if filenames1 != filenames2:
+ return False
+ return filecmp.cmpfiles(d1.filepath, d2.filepath, filenames1, shallow=False)
+
+def check(*args):
+ """Check if a list of distfiles are equivalent."""
+ notequiv = []
+ # Find all pairs that are not equivalent
+ for pair in itertools.combinations(args, 2):
+ if pair[0].compare(pair[1]):
+ print(f"{Colors.GREY}DEBUG: {pair[0].url} is equivalent to {pair[1].url}{Colors.END}")
+ else:
+ notequiv.append(pair)
+
+ # Decompress all, and check again
+ for d in {y for x in notequiv for y in x}:
+ d.decompress()
+ for pair in notequiv.copy():
+ if pair[0].compare(pair[1]):
+ # pylint: disable=line-too-long
+ print(f"{Colors.YELLOW}NOTE: {pair[0].url} is equivalent to {pair[1].url} when decompressed{Colors.END}")
+ notequiv.remove(pair)
+
+ # Extract all, and check again
+ for d in {y for x in notequiv for y in x}:
+ d.extract()
+ has_error = False
+ for pair in notequiv:
+ if pair[0].compare(pair[1]):
+ # pylint: disable=line-too-long
+ print(f"{Colors.ORANGE}WARN: {pair[0].url} is equivalent to {pair[1].url} when extracted{Colors.END}")
+ else:
+ has_error = True
+ # pylint: disable=line-too-long
+ print(f"{Colors.RED}ERROR: {pair[0].url} is not equivalent to {pair[1].url}!{Colors.END}")
+
+ return has_error
+
+def main():
+ """Main function."""
+ has_error = False
+ with open("substitutes", 'r', encoding="utf-8") as f:
+ for line in f.readlines():
+ urls = line.strip().split(' ')
+ distfiles = []
+ for i, url in enumerate(urls):
+ distfiles.append(Distfile(i, url))
+ for distfile in distfiles:
+ distfile.download()
+ if check(*distfiles):
+ has_error = True
+ sys.exit(has_error)
+
+if __name__ == "__main__":
+ main()
diff --git a/lib/generator.py b/lib/generator.py
@@ -274,7 +274,7 @@ When in doubt, try deleting the file in question -- it will be downloaded again
this script the next time")
@staticmethod
- def download_file(url, directory, file_name):
+ def download_file(url, directory, file_name, silent=False):
"""
Download a single source archive.
"""
@@ -290,7 +290,8 @@ this script the next time")
"User-Agent": "curl/7.88.1"
}
if not os.path.isfile(abs_file_name):
- print(f"Downloading: {file_name}")
+ if not silent:
+ print(f"Downloading: {file_name}")
response = requests.get(url, allow_redirects=True, stream=True,
headers=headers, timeout=20)
if response.status_code == 200:
diff --git a/steps/diffutils-2.7/sources b/steps/diffutils-2.7/sources
@@ -1 +1 @@
-https://dept.rpi.edu/acm/packages/diffutils/2.7/distrib/diffutils-2.7.tar.bz2 fd6c44f7cbd0a942a3f0c012365997965451197ad4faeb0b8aac1fe03192de58
+https://mirrors.kernel.org/gnu/diffutils/diffutils-2.7.tar.gz d5f2489c4056a31528e3ada4adacc23d498532b0af1a980f2f76158162b139d6
diff --git a/substitutes b/substitutes
@@ -0,0 +1,3 @@
+https://mirrors.kernel.org/gnu/bash/bash-2.05b.tar.gz https://src.fedoraproject.org/repo/pkgs/bash/bash-2.05b.tar.bz2/f3e5428ed52a4f536f571a945d5de95d/bash-2.05b.tar.bz2
+https://mirrors.kernel.org/gnu/bc/bc-1.07.1.tar.gz https://mirrors.kernel.org/slackware/slackware64-15.0/source/ap/bc/bc-1.07.1.tar.xz
+https://mirrors.kernel.org/gnu/ed/ed-1.4.tar.gz https://mirrors.kernel.org/slackware/slackware-13.37/source/a/ed/ed-1.4.tar.xz
diff --git a/substitutes.license b/substitutes.license
@@ -0,0 +1,3 @@
+SPDX-FileCopyrightText: 2024 fosslinux <fosslinux@aussies.space>
+
+SPDX-License-Identifier: GPL-3.0-or-later