From 2646457151814493d91f1127a757ef7a7c92293a Mon Sep 17 00:00:00 2001 From: Gunter Schmidt Date: Sat, 11 Apr 2026 06:23:40 +0200 Subject: [PATCH] Feat: Added Spell-Checker and fixed spellings For Code Spell Checker extension. Fixed some clippy warnings. --- .vscode/cSpell.json | 62 +++ .../acronyms+names.wordlist.txt | 80 ++++ .../cspell.dictionaries/jargon.wordlist.txt | 249 +++++++++++ .../cspell.dictionaries/people.wordlist.txt | 9 + .../cspell.dictionaries/shell.wordlist.txt | 123 ++++++ .../workspace.wordlist.txt | 402 ++++++++++++++++++ benches/bench-diffutils.rs | 14 +- src/cmp.rs | 4 +- src/context_diff.rs | 2 + src/ed_diff.rs | 2 + src/macros.rs | 9 +- src/normal_diff.rs | 2 + src/params.rs | 7 + src/side_diff.rs | 2 + src/unified_diff.rs | 2 + src/utils.rs | 6 +- 16 files changed, 964 insertions(+), 11 deletions(-) create mode 100644 .vscode/cSpell.json create mode 100644 .vscode/cspell.dictionaries/acronyms+names.wordlist.txt create mode 100644 .vscode/cspell.dictionaries/jargon.wordlist.txt create mode 100644 .vscode/cspell.dictionaries/people.wordlist.txt create mode 100644 .vscode/cspell.dictionaries/shell.wordlist.txt create mode 100644 .vscode/cspell.dictionaries/workspace.wordlist.txt diff --git a/.vscode/cSpell.json b/.vscode/cSpell.json new file mode 100644 index 0000000..a5feec3 --- /dev/null +++ b/.vscode/cSpell.json @@ -0,0 +1,62 @@ +// `cspell` settings +// spell-checker:ignore oranda +{ + // version of the setting file + "version": "0.2", + // spelling language + "language": "en", + // custom dictionaries + "dictionaries": [ + "acronyms+names", + "jargon", + "people", + "shell", + "workspace" + ], + "dictionaryDefinitions": [ + { + "name": "acronyms+names", + "path": "./cspell.dictionaries/acronyms+names.wordlist.txt" + }, + { + "name": "jargon", + "path": "./cspell.dictionaries/jargon.wordlist.txt" + }, + { + "name": "people", + "path": "./cspell.dictionaries/people.wordlist.txt" + }, + { + "name": "shell", + "path": "./cspell.dictionaries/shell.wordlist.txt" + }, + { + "name": "workspace", + "path": "./cspell.dictionaries/workspace.wordlist.txt" + } + ], + // files to ignore (globs supported) + "ignorePaths": [ + ".git/**", + "Cargo.lock", + "oranda.json", + "target/**", + "tests/**/fixtures/**", + "src/uu/dd/test-resources/**", + "vendor/**", + "**/*.svg", + "src/uu/*/locales/*.ftl", + "src/uudiff/locales/*.ftl", + ".devcontainer/**", + "util/gnu-patches/**", + "docs/src/release-notes/**", + "src/uu/*/benches/*.rs", + "src/uudiff/src/lib/features/benchmark.rs", + "util/check-safe-traversal.sh", + ], + "enableGlobDot": true, + // words to ignore (even if they are in the flagWords) + "ignoreWords": [], + // words to always consider correct + "words": [] +} \ No newline at end of file diff --git a/.vscode/cspell.dictionaries/acronyms+names.wordlist.txt b/.vscode/cspell.dictionaries/acronyms+names.wordlist.txt new file mode 100644 index 0000000..bdfcc26 --- /dev/null +++ b/.vscode/cspell.dictionaries/acronyms+names.wordlist.txt @@ -0,0 +1,80 @@ +# * diffutils project + + +# *** the following part is a copy of coreutils *** +# * abbreviations / acronyms +aarch +AIX +ASLR # address space layout randomization +AST # abstract syntax tree +CATN # busybox cat -n feature flag +CATV # busybox cat -v feature flag +CICD # continuous integration/deployment +CPU +CPUs +DevOps +Ext3 +FIFO +FIFOs +flac +FQDN # fully qualified domain name +GID # group ID +GIDs +GNU +GNUEABI +GNUEABIhf +impls +JFS +loongarch +lzma +MSRV # minimum supported rust version +MSVC +NixOS +POSIX +POSIXLY +ReiserFS +RISC +RISCV +RNG # random number generator +RNGs +Solaris +TOCTOU # time-of-check time-of-use +UID # user ID +UIDs +UUID # universally unique identifier +WASI +WASM +XFS + +# * names +BusyBox +BusyTest +Codacy +Cygwin +Deno +EditorConfig +EPEL +FreeBSD +genric +Gmail +Illumos +Irix +libfuzzer +MacOS +MinGW +Minix +MS-DOS +MSDOS +NetBSD +Novell +Nushell +OpenBSD +PowerPC +SELinux +SkyPack +SysV +Xenix +Yargs + +# Product +codspeed diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt new file mode 100644 index 0000000..e814469 --- /dev/null +++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt @@ -0,0 +1,249 @@ +# * diffutils project + + +# *** the following part is a copy of coreutils *** +AFAICT +asimd +ASIMD +alloc +arity +autogenerate +autogenerated +autogenerates +bitmask +bitwise +bufferram +bytewise +canonicalization +canonicalize +canonicalizing +capget +codepoint +codepoints +codeready +codegen +colorizable +colorize +coprime +consts +conv +cyclomatic +dedup +deduplication +demangle +denoland +deque +dequeue +dev +EINTR +eintr +nextest +SIGUSR +nonprinting +multibyte +devs +discoverability +duplicative +dsync +endianness +enqueue +ERANGE +errored +executable +executables +exponentiate +eval +esac +falsey +fileio +filesystem +filesystems +flamegraph +footgun +freeram +fsxattr +fullblock +getfacl +getfattr +getopt +gibi +gibibytes +glob +globbing +hardcode +hardcoded +hardcoding +hardfloat +hardlink +hardlinks +hasher +hwcaps +infile +iflag +iflags +kibi +kibibytes +langinfo +libacl +lcase +listxattr +llistxattr +lossily +lstat +makedev +mebi +mebibytes +mergeable +microbenchmark +microbenchmarks +microbenchmarking +monomorphized +multibyte +multicall +nmerge +noatime +nocache +nocreat +noctty +noerror +noexec +nofollow +nolinks +nonblock +nonportable +nonprinting +nonrepeating +nonseekable +notrunc +nowrite +noxfer +ofile +oflag +oflags +openat +pdeathsig +peekable +performant +prctl +precompiled +precompute +preload +prepend +prepended +primality +pseudoprime +pseudoprimes +quantiles +readonly +ROOTFS +reparse +rposition +seedable +semver +semiprime +semiprimes +setcap +setfacl +setfattr +SETFL +setlocale +shortcode +shortcodes +setpgid +sigaction +CHLD +chld +SIGCHLD +sigchld +siginfo +SIGTTIN +sigttin +SIGTTOU +sigttou +sigusr +strcasecmp +subcommand +subexpression +submodule +sync +symlink +symlinks +syscall +syscalls +sysconf +tokenize +toolchain +totalram +truthy +tunables +TUNABLES +ucase +unbuffered +udeps +unescape +unintuitive +unprefixed +unportable +unsync +urand +whitespace +wordlist +wordlists +xattrs +xpass + +# * abbreviations +AMPM +ampm +consts +deps +dev +fdlimit +inacc +maint +proc +procs +TOCTOU + +# * constants +xffff + +# * variables +delim +errno +progname +retval +subdir +val +vals +inval +nofield + +# * clippy +uninlined +nonminimal +rposition + +# * CPU/hardware features +ASIMD +asimd +hwcaps +PCLMUL +pclmul +PCLMULQDQ +pclmulqdq +PMULL +pmull +TUNABLES +tunables +VMULL +vmull +ENOTSUP +enotsup +SETFL +tmpfs + +Hijri +Nowruz +charmap +hijri diff --git a/.vscode/cspell.dictionaries/people.wordlist.txt b/.vscode/cspell.dictionaries/people.wordlist.txt new file mode 100644 index 0000000..987bfb7 --- /dev/null +++ b/.vscode/cspell.dictionaries/people.wordlist.txt @@ -0,0 +1,9 @@ +# this list is unique to diffutils +Gunter Schmidt + Gunter + Schmidt +Sylvestre Ledru + Sylvestre + Ledru + +axodotdev diff --git a/.vscode/cspell.dictionaries/shell.wordlist.txt b/.vscode/cspell.dictionaries/shell.wordlist.txt new file mode 100644 index 0000000..eb5be04 --- /dev/null +++ b/.vscode/cspell.dictionaries/shell.wordlist.txt @@ -0,0 +1,123 @@ +# * diffutils project + + +# *** the following part is a copy of coreutils *** +# * Mac +clonefile + +# * POSIX +TMPDIR +adduser +csh +globstar +inotify +localtime +mksh +mountinfo +mountpoint +mtab +nullglob + +# * Signals +SIGUSR +SIGUSR1 +SIGUSR2 +SIGINT +SIGTERM +SIGKILL +SIGSTOP +SIGCONT +SIGPIPE +SIGALRM +SIGCHLD +passwd +pipefail +popd +ptmx +pushd +setarch +sh +sudo +sudoedit +tcsh +tzselect +urandom +VARNAME +wtmp +zsh + +# * Windows +APPDATA +COMSPEC +HKCU +HKLM +HOMEDRIVE +HOMEPATH +LOCALAPPDATA +PATHEXT +PATHEXT +SYSTEMROOT +USERDOMAIN +USERNAME +USERPROFILE +procmon + +# * `git` +gitattributes +gitignore + +# * `make` (`gmake`) +CURDIR +GNUMAKEFLAGS +GNUMakefile +LIBPATTERNS +MAKECMDGOALS +MAKEFILES +MAKEFLAGS +MAKELEVEL +MAKESHELL +SHELLSTATUS +VPATH +abspath +addprefix +addsuffix +endef +findstring +firstword +ifeq +ifneq +lastword +notdir +patsubst + + +# * `npm` +preversion + +# * utilities +cachegrind +chglog +codespell +commitlint +dprint +dtrace +flamegraph +flamegraphs +gcov +gmake +grcov +grep +markdownlint +rerast +rollup +samply +sed +selinuxenabled +sestatus +vdir +wslpath +xargs + +# * directories +sbin +libexec diff --git a/.vscode/cspell.dictionaries/workspace.wordlist.txt b/.vscode/cspell.dictionaries/workspace.wordlist.txt new file mode 100644 index 0000000..d87630e --- /dev/null +++ b/.vscode/cspell.dictionaries/workspace.wordlist.txt @@ -0,0 +1,402 @@ +# * diffutils project +diffutils +sdiff +uudiff + +debuginfo +tabsize + +# *** the following part is a copy of coreutils *** +# * cargo +cdylib +rlib + +# * crates +advapi +advapi32-sys +aho-corasick +backtrace +blake2b_simd + +# * uutils project +uutils +coreutils +uucore +uutests +ucmd +uumain +rlimit +mkfifo +urandom +uchild +ello +bstr +bytecount +byteorder +chacha +chrono +conv +corasick +crossterm +exacl +filetime +formatteriteminfo +fsext +getopts +getrandom +globset +indicatif +itertools +itoa +iuse +langid +lscolors +mdbook +memchr +multifilereader +onig +ouroboros +peekreader +quickcheck +rand_chacha +ringbuffer +rlimit +rstest +smallvec +tempdir +tempfile +termion +termios +termsize +termwidth +textwrap +thiserror +unic +ureq +walkdir +winapi +xattr + +# * rust/rustc +RUSTDOCFLAGS +RUSTFLAGS +clippy +rustc +rustfmt +rustup +rustdoc +# +bitor # BitOr trait function +bitxor # BitXor trait function +concat +fract +powi +println +repr +rfind +struct +structs +substr +splitn +trunc +uninit + +# * uutils +basenc +chcon +chgrp +chmod +chown +chroot +cksum +csplit +dircolors +hashsum +hostid +logname +mkdir +mkfifo +mknod +mktemp +nohup +nproc +numfmt +pathchk +printenv +printf +readlink +realpath +relpath +rmdir +runcon +shuf +sprintf +stdbuf +stty +tsort +uname +unexpand +whoami + +# * vars/errno +errno +EACCES +EBADF +EBUSY +EEXIST +EINVAL +ENODATA +ENOENT +ENOSYS +ENOTEMPTY +EOPNOTSUPP +EPERM +EPIPE +EROFS + +# * vars/fcntl +F_GETFL + GETFL +fcntl +vmsplice + +# * vars/libc +COMFOLLOW +EXDEV +FILENO +FTSENT +HOSTSIZE +IDSIZE +IFBLK +IFCHR +IFDIR +IFIFO +IFLNK +IFMT +IFREG +IFSOCK +IRGRP +IROTH +IRUSR +ISDIR +ISGID +ISUID +ISVTX +IWGRP +IWOTH +IWUSR +IXGRP +IXOTH +IXUSR +LINESIZE +NAMESIZE +RTLD_NEXT + RTLD +SIGABRT +SIGINT +SIGKILL +SIGSTOP +SIGTERM +SYS_fdatasync +SYS_syncfs +USERSIZE +accpath +addrinfo +addrlen +blocksize +canonname +chroot +dlsym +execvp +fdatasync +freeaddrinfo +getaddrinfo +getegid +geteuid +getgid +getgrgid +getgrnam +getgrouplist +getgroups +getpwent +getpwnam +getpwuid +getuid +inode +inodes +isatty +lchown +pathlen +setgid +setgroups +settime +setuid +socketpair +socktype +statfs +statp +statvfs +strcmp +strerror +strlen +syncfs +umask +waitpid +wcslen + +# * vars/nix +iovec +unistd + +# * vars/signals +SIGPIPE + +# * vars/std +CString +pathbuf + +# * vars/stat +bavail +bfree +bsize +ffree +frsize +fsid +fstat +fstype +namelen +# unix::fs::MetadataExt +atime # access time +blksize # blocksize for file system I/O +blocks # number of blocks allocated to file +ctime # creation time +dev # ID of device containing the file +gid # group ID of file owner +ino # inode number +mode # permissions +mtime # modification time +nlink # number of hard links to file +rdev # device ID if file is a character/block special file +size # total size of file in bytes +uid # user ID of file owner +nsec # nanosecond measurement scale +# freebsd::MetadataExt +iosize + +# * vars/time +Timespec +isdst +nanos +nsec +nsecs +strftime +strptime +subsec +usec +usecs +utcoff + +# * vars/utmpx +endutxent +getutxent +getutxid +getutxline +pututxline +setutxent +utmp +utmpx +utmpxname + +# * vars/winapi +DWORD +SYSTEMTIME +LPVOID +LPWSTR +ULONG +ULONGLONG +UNLEN +WCHAR +WSADATA +errhandlingapi +fileapi +handleapi +lmcons +minwinbase +minwindef +processthreadsapi +synchapi +sysinfoapi +winbase +winerror +winnt +winsock + +# * vars/selinux +freecon +getfilecon +lgetfilecon +lsetfilecon +restorecon +setfilecon + +# * vars/uucore +optflag +optflagmulti +optflagopt +optmulti +optopt + +# * uutils +ccmd +coreopts +coreutils +keepenv +libc +libstdbuf +musl +tmpd +uchild +ucmd +ucommand +utmpx +uucore +uucore_procs +uudoc +uufuzz +uumain +uutil +uutests +uutils + +# * function names +getcwd + +# * other +weblate +algs + +# * stty terminal flags +brkint +cstopb +decctlq +echoctl +echoe +echoke +ignbrk +ignpar +icrnl +isig +istrip +litout +opost +parodd +ENOTTY + +# translation tests +CLICOLOR +erreur +Utilisation +merror +merreur +verbo +inattendu diff --git a/benches/bench-diffutils.rs b/benches/bench-diffutils.rs index e506b3f..4640e76 100644 --- a/benches/bench-diffutils.rs +++ b/benches/bench-diffutils.rs @@ -9,8 +9,11 @@ //! Set the TEMP_DIR const to keep the files. df_to_ files have small changes in them, search for '#'. \ //! File generation up to 1 GB is really fast, Benchmarking above 100 MB takes very long. +// clippy analyzes wrongly +#![allow(dead_code)] + /// Generate test files with these sizes in KB. -const FILE_SIZE_KILO_BYTES: [u64; 4] = [100, 1 * MB, 10 * MB, 25 * MB]; +const FILE_SIZE_KILO_BYTES: [u64; 4] = [100, MB, 10 * MB, 25 * MB]; // const FILE_SIZE_KILO_BYTES: [u64; 3] = [100, 1 * MB, 5 * MB]; // Empty String to use TempDir (files will be removed after test) or specify dir to keep generated files const TEMP_DIR: &str = ""; @@ -138,7 +141,7 @@ mod parser { #[divan::bench] fn cmp_parser(bencher: Bencher) { let cmd = "cmd file_1.txt file_2.txt -bl n10M --ignore-initial=100KiB:1MiB"; - let args = str_to_options(&cmd).into_iter().peekable(); + let args = str_to_options(cmd).into_iter().peekable(); bencher .with_inputs(|| args.clone()) .bench_values(|data| black_box(cmp::parse_params(data))); @@ -156,7 +159,7 @@ mod parser { #[divan::bench] fn diff_parser(bencher: Bencher) { let cmd = "diff file_1.txt file_2.txt -s --brief --expand-tabs --width=100"; - let args = str_to_options(&cmd).into_iter().peekable(); + let args = str_to_options(cmd).into_iter().peekable(); bencher .with_inputs(|| args.clone()) .bench_values(|data| black_box(params::parse_params(data))); @@ -247,9 +250,8 @@ mod prepare { pub fn str_to_options(opt: &str) -> Vec { let s: Vec = opt .split(" ") - .into_iter() .filter(|s| !s.is_empty()) - .map(|s| OsString::from(s)) + .map(OsString::from) .collect(); s @@ -337,7 +339,7 @@ mod prepare { } // create last line - let missing = (bytes - n_lines as u64 * LINE_LENGTH as u64) as usize; + let missing = (bytes - n_lines * LINE_LENGTH as u64) as usize; if missing > 0 { for word_idx in 0..10 { let start = word_idx * 6; // Each word + space block is 6 bytes diff --git a/src/cmp.rs b/src/cmp.rs index 587d5cc..1e8d4c3 100644 --- a/src/cmp.rs +++ b/src/cmp.rs @@ -3,6 +3,8 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. +// spell-checker:ignore ilog + use crate::utils::format_failure_to_read_input_file; use std::env::{self, ArgsOs}; use std::ffi::OsString; @@ -233,7 +235,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu } // Do as GNU cmp, and completely disable printing if we are - // outputing to /dev/null. + // outputting to /dev/null. #[cfg(not(target_os = "windows"))] if is_stdout_dev_null() { params.quiet = true; diff --git a/src/context_diff.rs b/src/context_diff.rs index 873fc3d..21a9eeb 100644 --- a/src/context_diff.rs +++ b/src/context_diff.rs @@ -3,6 +3,8 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. +// spell-checker:ignore alef alefr alefx betr betx nodiff + use std::collections::VecDeque; use std::io::Write; diff --git a/src/ed_diff.rs b/src/ed_diff.rs index b8cdbc5..bff73b0 100644 --- a/src/ed_diff.rs +++ b/src/ed_diff.rs @@ -3,6 +3,8 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. +// spell-checker:ignore alef alefr betr nodiff + use std::io::Write; use crate::params::Params; diff --git a/src/macros.rs b/src/macros.rs index 90a4eaa..9354964 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -1,5 +1,12 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +// spell-checker:ignore replacen + // asserts equality of the actual diff and expected diff -// considering datetime varitations +// considering datetime variations // // It replaces the modification time in the actual diff // with placeholder "TIMESTAMP" and then asserts the equality diff --git a/src/normal_diff.rs b/src/normal_diff.rs index 002cd01..359f829 100644 --- a/src/normal_diff.rs +++ b/src/normal_diff.rs @@ -3,6 +3,8 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. +// spell-checker:ignore alef alefn alefr betn betr nodiff + use std::io::Write; use crate::params::Params; diff --git a/src/params.rs b/src/params.rs index 74ef3e3..f205226 100644 --- a/src/params.rs +++ b/src/params.rs @@ -1,3 +1,10 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +// spell-checker:ignore numvalue + use std::ffi::OsString; use std::iter::Peekable; use std::path::PathBuf; diff --git a/src/side_diff.rs b/src/side_diff.rs index 56953d2..c64c0e9 100644 --- a/src/side_diff.rs +++ b/src/side_diff.rs @@ -3,6 +3,8 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. +// spell-checker:ignore áéíóú endiand mcel rxyz + use core::cmp::{max, min}; use diff::Result; use std::{io::Write, vec}; diff --git a/src/unified_diff.rs b/src/unified_diff.rs index 0f504a8..b1eda54 100644 --- a/src/unified_diff.rs +++ b/src/unified_diff.rs @@ -3,6 +3,8 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. +// spell-checker:ignore alef alefn alefr alefx betn betr betx nodiff + use std::collections::VecDeque; use std::io::Write; diff --git a/src/utils.rs b/src/utils.rs index daca18d..a22aec5 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -13,11 +13,11 @@ use unicode_width::UnicodeWidthStr; #[must_use] pub fn do_expand_tabs(line: &[u8], tabsize: usize) -> Vec { let tab = b'\t'; - let ntabs = line.iter().filter(|c| **c == tab).count(); - if ntabs == 0 { + let n_tabs = line.iter().filter(|c| **c == tab).count(); + if n_tabs == 0 { return line.to_vec(); } - let mut result = Vec::with_capacity(line.len() + ntabs * (tabsize - 1)); + let mut result = Vec::with_capacity(line.len() + n_tabs * (tabsize - 1)); let mut offset = 0; let mut iter = line.split(|c| *c == tab).peekable();