Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ unicode-width = "0.2.0"
pretty_assertions = "1.4.0"
assert_cmd = "2.0.14"
predicates = "3.1.0"
tempfile = "3.10.1"
tempfile = "3.26.0"

[profile.release]
lto = "thin"
Expand All @@ -40,3 +40,9 @@ panic = "abort"
[profile.dist]
inherits = "release"
lto = "thin"

[features]
# default = ["cmp_bytes_limit_128_bit"]
# allows to set the --bytes limit from u64 to u128, if limits larger than Exabyte are required.
cmp_bytes_limit_128_bit = []

101 changes: 54 additions & 47 deletions src/cmp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,23 @@ use std::os::unix::fs::MetadataExt;
#[cfg(target_os = "windows")]
use std::os::windows::fs::MetadataExt;

/// for --bytes, so really large number limits can be expressed, like 1Y.
#[cfg(not(feature = "cmp_bytes_limit_128_bit"))]
pub type Bytes = u64;
#[cfg(feature = "cmp_bytes_limit_128_bit")]
pub type Bytes = u128;
// ignore initial is currently limited to u64, as take(skip) is used.
pub type IgnInit = u64;

#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct Params {
executable: OsString,
from: OsString,
to: OsString,
print_bytes: bool,
skip_a: Option<usize>,
skip_b: Option<usize>,
max_bytes: Option<usize>,
skip_a: Option<IgnInit>,
skip_b: Option<IgnInit>,
max_bytes: Option<Bytes>,
verbose: bool,
quiet: bool,
}
Expand Down Expand Up @@ -66,13 +74,13 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
};
let executable_str = executable.to_string_lossy().to_string();

let parse_skip = |param: &str, skip_desc: &str| -> Result<usize, String> {
let parse_skip = |param: &str, skip_desc: &str| -> Result<IgnInit, String> {
let suffix_start = param
.find(|b: char| !b.is_ascii_digit())
.unwrap_or(param.len());
let mut num = match param[..suffix_start].parse::<usize>() {
let mut num = match param[..suffix_start].parse::<IgnInit>() {
Ok(num) => num,
Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX,
Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => IgnInit::MAX,
Err(_) => {
return Err(format!(
"{executable_str}: invalid --ignore-initial value '{skip_desc}'"
Expand All @@ -83,33 +91,24 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
if suffix_start != param.len() {
// Note that GNU cmp advertises supporting up to Y, but fails if you try
// to actually use anything beyond E.
let multiplier: usize = match &param[suffix_start..] {
let multiplier: IgnInit = match &param[suffix_start..] {
"kB" => 1_000,
"K" => 1_024,
"MB" => 1_000_000,
"M" => 1_048_576,
"GB" => 1_000_000_000,
"G" => 1_073_741_824,
// This only generates a warning when compiling for target_pointer_width < 64
#[allow(unused_variables)]
suffix @ ("TB" | "T" | "PB" | "P" | "EB" | "E") => {
#[cfg(target_pointer_width = "64")]
match suffix {
"TB" => 1_000_000_000_000,
"T" => 1_099_511_627_776,
"PB" => 1_000_000_000_000_000,
"P" => 1_125_899_906_842_624,
"EB" => 1_000_000_000_000_000_000,
"E" => 1_152_921_504_606_846_976,
_ => unreachable!(),
}
#[cfg(not(target_pointer_width = "64"))]
usize::MAX
}
"ZB" => usize::MAX, // 1_000_000_000_000_000_000_000,
"Z" => usize::MAX, // 1_180_591_620_717_411_303_424,
"YB" => usize::MAX, // 1_000_000_000_000_000_000_000_000,
"Y" => usize::MAX, // 1_208_925_819_614_629_174_706_176,
"TB" => 1_000_000_000_000,
"T" => 1_099_511_627_776,
"PB" => 1_000_000_000_000_000,
"P" => 1_125_899_906_842_624,
"EB" => 1_000_000_000_000_000_000,
"E" => 1_152_921_504_606_846_976,
// TODO setting usize:MAX does not mimic GNU cmp behavior, it should be an error.
"ZB" => IgnInit::MAX, // 1_000_000_000_000_000_000_000,
"Z" => IgnInit::MAX, // 1_180_591_620_717_411_303_424,
"YB" => IgnInit::MAX, // 1_000_000_000_000_000_000_000_000,
"Y" => IgnInit::MAX, // 1_208_925_819_614_629_174_706_176,
_ => {
return Err(format!(
"{executable_str}: invalid --ignore-initial value '{skip_desc}'"
Expand All @@ -119,7 +118,7 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu

num = match num.overflowing_mul(multiplier) {
(n, false) => n,
_ => usize::MAX,
_ => IgnInit::MAX,
}
}

Expand Down Expand Up @@ -173,9 +172,10 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
let (_, arg) = param_str.split_once('=').unwrap();
arg.to_string()
};
let max_bytes = match max_bytes.parse::<usize>() {
let max_bytes = match max_bytes.parse::<Bytes>() {
Ok(num) => num,
Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX,
// TODO limit to MAX is dangerous, this should become an error like in GNU cmp.
Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => Bytes::MAX,
Err(_) => {
return Err(format!(
"{executable_str}: invalid --bytes value '{max_bytes}'"
Expand Down Expand Up @@ -233,7 +233,7 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu
}

// Do as GNU cmp, and completely disable printing if we are
// outputing to /dev/null.
// outputting to /dev/null.
#[cfg(not(target_os = "windows"))]
if is_stdout_dev_null() {
params.quiet = true;
Expand Down Expand Up @@ -285,7 +285,7 @@ pub fn parse_params<I: Iterator<Item = OsString>>(mut opts: Peekable<I>) -> Resu

fn prepare_reader(
path: &OsString,
skip: &Option<usize>,
skip: &Option<IgnInit>,
params: &Params,
) -> Result<Box<dyn BufRead>, String> {
let mut reader: Box<dyn BufRead> = if path == "-" {
Expand All @@ -304,6 +304,8 @@ fn prepare_reader(
};

if let Some(skip) = skip {
// cast as u64 must remain, because value of IgnInit data type could be changed.
#[allow(clippy::unnecessary_cast)]
if let Err(e) = io::copy(&mut reader.by_ref().take(*skip as u64), &mut io::sink()) {
return Err(format_failure_to_read_input_file(
&params.executable,
Expand All @@ -326,7 +328,7 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
let mut from = prepare_reader(&params.from, &params.skip_a, params)?;
let mut to = prepare_reader(&params.to, &params.skip_b, params)?;

let mut offset_width = params.max_bytes.unwrap_or(usize::MAX);
let mut offset_width = params.max_bytes.unwrap_or(Bytes::MAX);

if let (Ok(a_meta), Ok(b_meta)) = (fs::metadata(&params.from), fs::metadata(&params.to)) {
#[cfg(not(target_os = "windows"))]
Expand All @@ -341,7 +343,7 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
return Ok(Cmp::Different);
}

let smaller = cmp::min(a_size, b_size) as usize;
let smaller = cmp::min(a_size, b_size) as Bytes;
offset_width = cmp::min(smaller, offset_width);
}

Expand All @@ -350,8 +352,8 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
// Capacity calc: at_byte width + 2 x 3-byte octal numbers + 2 x 4-byte value + 4 spaces
let mut output = Vec::<u8>::with_capacity(offset_width + 3 * 2 + 4 * 2 + 4);

let mut at_byte = 1;
let mut at_line = 1;
let mut at_byte: Bytes = 1;
let mut at_line: u64 = 1;
let mut start_of_line = true;
let mut stdout = BufWriter::new(io::stdout().lock());
let mut compare = Cmp::Equal;
Expand Down Expand Up @@ -401,8 +403,8 @@ pub fn cmp(params: &Params) -> Result<Cmp, String> {
if from_buf[..consumed] == to_buf[..consumed] {
let last = from_buf[..consumed].last().unwrap();

at_byte += consumed;
at_line += from_buf[..consumed].iter().filter(|&c| *c == b'\n').count();
at_byte += consumed as Bytes;
at_line += (from_buf[..consumed].iter().filter(|&c| *c == b'\n').count()) as u64;

start_of_line = *last == b'\n';

Expand Down Expand Up @@ -590,7 +592,7 @@ fn format_visible_byte(byte: u8) -> String {
fn format_verbose_difference(
from_byte: u8,
to_byte: u8,
at_byte: usize,
at_byte: Bytes,
offset_width: usize,
output: &mut Vec<u8>,
params: &Params,
Expand Down Expand Up @@ -655,7 +657,7 @@ fn format_verbose_difference(
}

#[inline]
fn report_eof(at_byte: usize, at_line: usize, start_of_line: bool, eof_on: &str, params: &Params) {
fn report_eof(at_byte: Bytes, at_line: u64, start_of_line: bool, eof_on: &str, params: &Params) {
if params.quiet {
return;
}
Expand Down Expand Up @@ -707,7 +709,7 @@ fn is_posix_locale() -> bool {
}

#[inline]
fn report_difference(from_byte: u8, to_byte: u8, at_byte: usize, at_line: usize, params: &Params) {
fn report_difference(from_byte: u8, to_byte: u8, at_byte: Bytes, at_line: u64, params: &Params) {
if params.quiet {
return;
}
Expand Down Expand Up @@ -804,7 +806,7 @@ mod tests {
from: os("foo"),
to: os("bar"),
skip_a: Some(1),
skip_b: Some(usize::MAX),
skip_b: Some(IgnInit::MAX),
..Default::default()
}),
parse_params(
Expand Down Expand Up @@ -982,7 +984,7 @@ mod tests {
executable: os("cmp"),
from: os("foo"),
to: os("bar"),
max_bytes: Some(usize::MAX),
max_bytes: Some(Bytes::MAX),
..Default::default()
}),
parse_params(
Expand All @@ -999,6 +1001,7 @@ mod tests {
);

// Failure case
// TODO This is actually fine in GNU cmp. --bytes does not have a unit parser yet.
assert_eq!(
Err("cmp: invalid --bytes value '1K'".to_string()),
parse_params(
Expand Down Expand Up @@ -1044,8 +1047,8 @@ mod tests {
executable: os("cmp"),
from: os("foo"),
to: os("bar"),
skip_a: Some(usize::MAX),
skip_b: Some(usize::MAX),
skip_a: Some(IgnInit::MAX),
skip_b: Some(IgnInit::MAX),
..Default::default()
}),
parse_params(
Expand Down Expand Up @@ -1116,8 +1119,12 @@ mod tests {
.enumerate()
{
let values = [
1_000usize.checked_pow((i + 1) as u32).unwrap_or(usize::MAX),
1024usize.checked_pow((i + 1) as u32).unwrap_or(usize::MAX),
(1_000 as IgnInit)
.checked_pow((i + 1) as u32)
.unwrap_or(IgnInit::MAX),
(1024 as IgnInit)
.checked_pow((i + 1) as u32)
.unwrap_or(IgnInit::MAX),
];
for (j, v) in values.iter().enumerate() {
assert_eq!(
Expand Down
Loading