Skip to content

Commit 28a0d5f

Browse files
committed
du: parse blocksize and apparent-size using uutils-args
Ideally, we should replace *all* argument parsing by clap in one go, but that is a monumental task. Let's do it one step at a time instead.
1 parent a0fe6d3 commit 28a0d5f

File tree

4 files changed

+248
-80
lines changed

4 files changed

+248
-80
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/uu/du/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ chrono = { workspace = true }
2323
glob = { workspace = true }
2424
clap = { workspace = true }
2525
uucore = { workspace = true, features = ["format", "parser"] }
26+
uutils-args = { workspace = true }
2627
thiserror = { workspace = true }
2728

2829
[target.'cfg(target_os = "windows")'.dependencies]

src/uu/du/src/du.rs

Lines changed: 170 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,14 @@
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
55

6+
// spell-checker:ignore (terms) Kibi Giga Tera Tebi Peta Pebi Exbi
7+
68
use chrono::{DateTime, Local};
79
use clap::{Arg, ArgAction, ArgMatches, Command, builder::PossibleValue};
810
use glob::Pattern;
911
use std::collections::HashSet;
1012
use std::env;
13+
use std::ffi::{OsStr, OsString};
1114
#[cfg(not(windows))]
1215
use std::fs::Metadata;
1316
use std::fs::{self, DirEntry, File};
@@ -31,6 +34,7 @@ use uucore::parser::parse_glob;
3134
use uucore::parser::parse_size::{ParseSizeError, parse_size_u64};
3235
use uucore::parser::shortcut_value_parser::ShortcutValueParser;
3336
use uucore::{format_usage, help_about, help_section, help_usage, show, show_error, show_warning};
37+
use uutils_args::{Arguments, Options, Value, ValueResult};
3438
#[cfg(windows)]
3539
use windows_sys::Win32::Foundation::HANDLE;
3640
#[cfg(windows)]
@@ -74,6 +78,112 @@ const ABOUT: &str = help_about!("du.md");
7478
const AFTER_HELP: &str = help_section!("after help", "du.md");
7579
const USAGE: &str = help_usage!("du.md");
7680

81+
#[derive(Clone, Debug, Eq, PartialEq)]
82+
#[allow(dead_code)]
83+
enum SizeUnit {
84+
Kilo,
85+
Kibi,
86+
Mega,
87+
Mebi,
88+
Giga,
89+
Gibi,
90+
Tera,
91+
Tebi,
92+
Peta,
93+
Pebi,
94+
Exa,
95+
Exbi,
96+
// GNU du does not recognize higher prefixes, so we shouldn't either.
97+
}
98+
99+
#[derive(Arguments)]
100+
enum NewArg {
101+
#[arg(
102+
"-0",
103+
"-a",
104+
"--all",
105+
"-c",
106+
"--count-links",
107+
"-D",
108+
"--dereference",
109+
"--dereference-args",
110+
"-H",
111+
"--inodes",
112+
"-l",
113+
"-L",
114+
"--no-dereference",
115+
"--one-file-system",
116+
"-P",
117+
"-s",
118+
"-S",
119+
"--separate-dirs",
120+
"--summarize",
121+
"--verbose",
122+
"-x"
123+
)]
124+
Ignore,
125+
126+
#[arg(
127+
"--exclude-from=X",
128+
"--exclude=X",
129+
"--files0-from=X",
130+
"--threshold=X",
131+
"--time-style=X"
132+
)]
133+
IgnoreArg(#[allow(dead_code)] OsString),
134+
135+
#[arg("--time[=X]", "-d[X]")]
136+
IgnoreOptionArg(#[allow(dead_code)] Option<OsString>),
137+
138+
#[arg("--apparent-size")]
139+
ApparentSize,
140+
141+
#[arg("-B SIZE", "--block-size=SIZE")]
142+
BlockSize(SizeFormat),
143+
144+
#[arg("-h", "--human-readable")]
145+
HumanBinary,
146+
147+
#[arg("--si")]
148+
HumanDecimal,
149+
150+
#[arg("-b", "--bytes")]
151+
Bytes,
152+
153+
#[arg("-k")]
154+
KibiBytes,
155+
156+
#[arg("-m")]
157+
MebiBytes,
158+
}
159+
160+
#[derive(Debug, Default, PartialEq, Eq)]
161+
struct Settings {
162+
apparent_size: bool,
163+
size_format: SizeFormat,
164+
}
165+
166+
impl Options<NewArg> for Settings {
167+
fn apply(&mut self, arg: NewArg) -> Result<(), uutils_args::Error> {
168+
match arg {
169+
NewArg::ApparentSize => self.apparent_size = true,
170+
NewArg::BlockSize(size_format) => self.size_format = size_format,
171+
NewArg::Bytes => {
172+
self.apparent_size = true;
173+
self.size_format = SizeFormat::BlockSize(1);
174+
}
175+
NewArg::HumanBinary => self.size_format = SizeFormat::HumanBinary,
176+
NewArg::HumanDecimal => self.size_format = SizeFormat::HumanDecimal,
177+
NewArg::KibiBytes => self.size_format = SizeFormat::BlockSize(1024),
178+
NewArg::MebiBytes => self.size_format = SizeFormat::BlockSize(1024 * 1024),
179+
NewArg::Ignore => {}
180+
NewArg::IgnoreArg(_) => {}
181+
NewArg::IgnoreOptionArg(_) => {}
182+
}
183+
Ok(())
184+
}
185+
}
186+
77187
struct TraversalOptions {
78188
all: bool,
79189
separate_dirs: bool,
@@ -111,11 +221,54 @@ enum Time {
111221
Created,
112222
}
113223

114-
#[derive(Clone)]
224+
#[derive(Clone, Debug, Eq, PartialEq)]
115225
enum SizeFormat {
116226
HumanDecimal,
117227
HumanBinary,
118228
BlockSize(u64),
229+
#[allow(dead_code)]
230+
Unit(SizeUnit),
231+
}
232+
233+
impl SizeFormat {
234+
fn parse(s: &OsStr, _lenient: bool) -> ValueResult<SizeFormat> {
235+
// FIXME: Must have a custom parser, since "Unit" and "Divisor" distinction is lost by parse_size_u64!
236+
// FIXME: Must be more lenient when parsing envvars, i.e. allow trailing (potentially non-UTF-8) garbage!
237+
if let Some(s) = s.to_str() {
238+
let bytes = parse_size_u64(s)?;
239+
// FIXME: Use USimpleError maybe?
240+
if bytes == 0 {
241+
Err("".into()) // FIXME: Error messages are ignored?!
242+
} else {
243+
Ok(SizeFormat::BlockSize(bytes))
244+
}
245+
} else {
246+
Err("".into()) // FIXME: Error messages are ignored?!
247+
}
248+
}
249+
}
250+
251+
impl Default for SizeFormat {
252+
fn default() -> SizeFormat {
253+
for env_var in ["DU_BLOCK_SIZE", "BLOCK_SIZE", "BLOCKSIZE"] {
254+
if let Some(env_size) = env::var_os(env_var) {
255+
if let Ok(v) = SizeFormat::parse(&env_size, true) {
256+
return v;
257+
}
258+
}
259+
}
260+
if env::var("POSIXLY_CORRECT").is_ok() {
261+
SizeFormat::BlockSize(512)
262+
} else {
263+
SizeFormat::BlockSize(1024)
264+
}
265+
}
266+
}
267+
268+
impl Value for SizeFormat {
269+
fn from_value(s: &OsStr) -> ValueResult<SizeFormat> {
270+
SizeFormat::parse(s, false)
271+
}
119272
}
120273

121274
#[derive(PartialEq, Eq, Hash, Clone, Copy)]
@@ -280,26 +433,6 @@ fn get_file_info(path: &Path) -> Option<FileInfo> {
280433
result
281434
}
282435

283-
fn read_block_size(s: Option<&str>) -> UResult<u64> {
284-
if let Some(s) = s {
285-
parse_size_u64(s)
286-
.map_err(|e| USimpleError::new(1, format_error_message(&e, s, options::BLOCK_SIZE)))
287-
} else {
288-
for env_var in ["DU_BLOCK_SIZE", "BLOCK_SIZE", "BLOCKSIZE"] {
289-
if let Ok(env_size) = env::var(env_var) {
290-
if let Ok(v) = parse_size_u64(&env_size) {
291-
return Ok(v);
292-
}
293-
}
294-
}
295-
if env::var("POSIXLY_CORRECT").is_ok() {
296-
Ok(512)
297-
} else {
298-
Ok(1024)
299-
}
300-
}
301-
}
302-
303436
// this takes `my_stat` to avoid having to stat files multiple times.
304437
#[allow(clippy::cognitive_complexity)]
305438
fn du(
@@ -549,6 +682,7 @@ impl StatPrinter {
549682
size.div_ceil(block_size).to_string()
550683
}
551684
}
685+
SizeFormat::Unit(_) => unimplemented!(), // FIXME
552686
}
553687
}
554688

@@ -618,7 +752,12 @@ fn read_files_from(file_name: &str) -> Result<Vec<PathBuf>, std::io::Error> {
618752
#[uucore::main]
619753
#[allow(clippy::cognitive_complexity)]
620754
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
621-
let matches = uu_app().try_get_matches_from(args)?;
755+
let raw_args = args.collect::<Vec<_>>();
756+
let (settings, _operands) = Settings::default()
757+
.parse(&raw_args)
758+
// FIXME: Leads to ugly errors
759+
.map_err(|e| std::io::Error::other(format!("{e}").trim_start_matches("error: ")))?;
760+
let matches = uu_app().try_get_matches_from(raw_args.iter())?;
622761

623762
let summarize = matches.get_flag(options::SUMMARIZE);
624763

@@ -665,29 +804,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
665804
}
666805
});
667806

668-
let size_format = if matches.get_flag(options::HUMAN_READABLE) {
669-
SizeFormat::HumanBinary
670-
} else if matches.get_flag(options::SI) {
671-
SizeFormat::HumanDecimal
672-
} else if matches.get_flag(options::BYTES) {
673-
SizeFormat::BlockSize(1)
674-
} else if matches.get_flag(options::BLOCK_SIZE_1K) {
675-
SizeFormat::BlockSize(1024)
676-
} else if matches.get_flag(options::BLOCK_SIZE_1M) {
677-
SizeFormat::BlockSize(1024 * 1024)
678-
} else {
679-
let block_size_str = matches.get_one::<String>(options::BLOCK_SIZE);
680-
let block_size = read_block_size(block_size_str.map(AsRef::as_ref))?;
681-
if block_size == 0 {
682-
return Err(std::io::Error::other(format!(
683-
"invalid --{} argument {}",
684-
options::BLOCK_SIZE,
685-
block_size_str.map_or("???BUG", |v| v).quote()
686-
))
687-
.into());
688-
}
689-
SizeFormat::BlockSize(block_size)
690-
};
807+
let size_format = settings.size_format;
691808

692809
let traversal_options = TraversalOptions {
693810
all: matches.get_flag(options::ALL),
@@ -726,7 +843,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
726843
})
727844
})
728845
.transpose()?,
729-
apparent_size: matches.get_flag(options::APPARENT_SIZE) || matches.get_flag(options::BYTES),
846+
apparent_size: settings.apparent_size,
730847
time,
731848
time_format,
732849
line_ending: LineEnding::from_zero_flag(matches.get_flag(options::NULL)),
@@ -1110,9 +1227,13 @@ mod test_du {
11101227

11111228
#[test]
11121229
fn test_read_block_size() {
1113-
let test_data = [Some("1024".to_string()), Some("K".to_string()), None];
1114-
for it in &test_data {
1115-
assert!(matches!(read_block_size(it.as_deref()), Ok(1024)));
1230+
for (input, expected) in [
1231+
("1024", SizeFormat::BlockSize(1024)),
1232+
("1K", SizeFormat::BlockSize(1024)),
1233+
// FIXME: data loss! Should also return whether a suffix was used, see test_du_blocksize_multiplier and #7738
1234+
// ("K", SizeFormat::Unit(SizeUnit::Kibi)),
1235+
] {
1236+
assert_eq!(expected, SizeFormat::from_value(OsStr::new(input)).unwrap());
11161237
}
11171238
}
11181239
}

0 commit comments

Comments
 (0)