diff --git a/Cargo.lock b/Cargo.lock index 06355f0..5c4d46e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -146,6 +146,62 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + [[package]] name = "either" version = "1.15.0" @@ -217,6 +273,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "jwalk" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2735847566356cd2179a2a38264839308f7079fa96e6bd5a42d740460e003c56" +dependencies = [ + "crossbeam", + "rayon", +] + [[package]] name = "libc" version = "0.2.180" @@ -266,20 +332,31 @@ dependencies = [ ] [[package]] -name = "rustversion" -version = "1.0.22" +name = "rayon" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] [[package]] -name = "same-file" -version = "1.0.6" +name = "rayon-core" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" dependencies = [ - "winapi-util", + "crossbeam-deque", + "crossbeam-utils", ] +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + [[package]] name = "strsim" version = "0.11.1" @@ -295,7 +372,8 @@ dependencies = [ "indicatif", "infer", "itertools", - "walkdir", + "jwalk", + "rayon", ] [[package]] @@ -337,16 +415,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] - [[package]] name = "wasm-bindgen" version = "0.2.106" @@ -402,15 +470,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "winapi-util" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" -dependencies = [ - "windows-sys 0.61.2", -] - [[package]] name = "windows-link" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index 9f7c3c0..d0673ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,8 @@ clap = { version = "4.5", features = ["derive"] } indicatif = "0.17" infer = "0.19" itertools = "0.14.0" -walkdir = "2.5.0" +jwalk = "0.8" +rayon = "1" [lints.clippy] unwrap_used = "deny" diff --git a/src/main.rs b/src/main.rs index c45fbc8..6b54faf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,8 @@ +use jwalk::WalkDir; +use rayon::prelude::*; use std::fs::File; use std::io::Read; use std::{collections::BTreeMap, path::PathBuf}; -use walkdir::WalkDir; use anyhow::{Context, Result}; use clap::Parser; @@ -134,19 +135,17 @@ fn detect_mimetype(path: &std::path::Path) -> Result { Ok("application/octet-stream".to_string()) } -fn process_entry(entry: &walkdir::DirEntry, report: &mut Report) -> Result<()> { - let ext = entry - .path() +fn process_entry(path: &std::path::Path, report: &mut Report) -> Result<()> { + let ext = path .extension() .unwrap_or_default() .to_owned() .into_string() .unwrap_or_default(); - let metadata = entry - .path() + let metadata = path .metadata() - .with_context(|| format!("failed to read metadata for {:?}", entry.path()))?; + .with_context(|| format!("failed to read metadata for {:?}", path))?; report.size += metadata.len(); report @@ -155,8 +154,8 @@ fn process_entry(entry: &walkdir::DirEntry, report: &mut Report) -> Result<()> { .and_modify(|e| *e += 1) .or_insert(1); - let mimetype = detect_mimetype(entry.path()) - .with_context(|| format!("failed to detect mimetype for {:?}", entry.path()))?; + let mimetype = detect_mimetype(path) + .with_context(|| format!("failed to detect mimetype for {:?}", path))?; report .mimetypes @@ -167,9 +166,26 @@ fn process_entry(entry: &walkdir::DirEntry, report: &mut Report) -> Result<()> { Ok(()) } -fn scan(target: PathBuf, progress_bar: bool) -> Report { - let mut report = Report::default(); +fn merge_reports(mut a: Report, b: Report) -> Report { + for (ext, count) in b.extensions { + a.extensions + .entry(ext) + .and_modify(|e| *e += count) + .or_insert(count); + } + for (mime, count) in b.mimetypes { + a.mimetypes + .entry(mime) + .and_modify(|e| *e += count) + .or_insert(count); + } + a.folders.extend(b.folders); + a.size += b.size; + a.errors.extend(b.errors); + a +} +fn scan(target: PathBuf, progress_bar: bool) -> Report { let pb = if progress_bar { let progress = ProgressBar::new_spinner(); progress.set_style( @@ -183,34 +199,47 @@ fn scan(target: PathBuf, progress_bar: bool) -> Report { None }; - for entry in WalkDir::new(target).into_iter().skip(1) { - match entry { - Ok(entry) => { - if entry.path().is_dir() { - report.folders.push(entry.path().to_path_buf()); - } else { - if let Some(ref progress) = pb { - progress.set_message(format!("Processing: {}", entry.path().display())); - progress.tick(); - } - - if let Err(e) = process_entry(&entry, &mut report) { - report.errors.push(ScanError { - path: entry.path().to_path_buf(), - message: e.to_string(), + let pb_fold = pb.clone(); + let report = WalkDir::new(target) + .into_iter() + .skip(1) + .par_bridge() + .fold(Report::default, move |mut partial, entry| { + match entry { + Ok(entry) => { + let path = entry.path(); + if let Some(err) = entry.read_children_error { + partial.errors.push(ScanError { + path, + message: format!("failed to read directory: {err}"), }); + return partial; + } + if entry.file_type().is_dir() { + partial.folders.push(path); + } else { + if let Some(ref pb) = pb_fold { + pb.tick(); + } + if let Err(e) = process_entry(&path, &mut partial) { + partial.errors.push(ScanError { + path, + message: e.to_string(), + }); + } } } + Err(e) => { + let path = e.path().map(|p| p.to_path_buf()).unwrap_or_default(); + partial.errors.push(ScanError { + path, + message: format!("failed to read entry: {e}"), + }); + } } - Err(e) => { - let path = e.path().map(|p| p.to_path_buf()).unwrap_or_default(); - report.errors.push(ScanError { - path, - message: format!("failed to read entry: {e}"), - }); - } - } - } + partial + }) + .reduce(Report::default, merge_reports); if let Some(progress) = pb { progress.finish_with_message(format!("Completed with {} errors", report.errors.len()));