xl-cli-tools

CLI tools for viewing and editing Excel files
Log | Files | Refs | README | LICENSE

commit 4cf2522577bdb1a5967e5cf2f424d2b1c4963f6a
parent db1e86e14fca2143cca15c0b9afc3af4011f1ea2
Author: Erik Loualiche <[email protected]>
Date:   Fri, 13 Mar 2026 18:02:43 -0500

refactor: restructure to lib + two binaries (xlcat, xlset)

Move shared modules (formatter, metadata, reader) into lib.rs so both
xlcat and xlset binaries can reuse them. Add stub cell.rs and writer.rs
modules for upcoming xlset feature. Add umya-spreadsheet dependency.

Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>

Diffstat:
MCargo.lock | 349+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
MCargo.toml | 15++++++++++++++-
Asrc/bin/xlcat.rs | 362+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/bin/xlset.rs | 4++++
Asrc/cell.rs | 1+
Asrc/lib.rs | 5+++++
Dsrc/main.rs | 362-------------------------------------------------------------------------------
Asrc/writer.rs | 1+
8 files changed, 730 insertions(+), 369 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock @@ -9,6 +9,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + +[[package]] name = "ahash" version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -210,12 +221,45 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + +[[package]] name = "bitflags" version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" [[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "block-padding" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" +dependencies = [ + "generic-array", +] + +[[package]] name = "bstr" version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -277,7 +321,7 @@ dependencies = [ "codepage", "encoding_rs", "log", - "quick-xml", + "quick-xml 0.31.0", "serde", "zip", ] @@ -292,6 +336,15 @@ dependencies = [ ] [[package]] +name = "cbc" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" +dependencies = [ + "cipher", +] + +[[package]] name = "cc" version = "1.2.56" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -304,6 +357,17 @@ dependencies = [ ] [[package]] +name = "cfb" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a4f8e55be323b378facfcf1f06aa97f6ec17cf4ac84fb17325093aaf62da41" +dependencies = [ + "byteorder", + "fnv", + "uuid", +] + +[[package]] name = "cfg-if" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -331,6 +395,16 @@ dependencies = [ ] [[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", +] + +[[package]] name = "clap" version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -418,6 +492,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] name = "crc32fast" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -493,6 +576,16 @@ dependencies = [ ] [[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] name = "debug_unsafe" version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -516,6 +609,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" [[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] name = "displaydoc" version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -527,6 +631,12 @@ dependencies = [ ] [[package]] +name = "doc-comment" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "780955b8b195a21ab8e4ac6b60dd1dbdcec1dc6c51c0617964b08c81785e12c9" + +[[package]] name = "document-features" version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -597,6 +707,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" [[package]] +name = "fancy-regex" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298" +dependencies = [ + "bit-set", + "regex-automata", + "regex-syntax", +] + +[[package]] name = "fast-float2" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -634,6 +755,12 @@ dependencies = [ ] [[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] name = "foldhash" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -728,6 +855,16 @@ dependencies = [ ] [[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] name = "getrandom" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -815,6 +952,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] name = "home" version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -824,6 +970,21 @@ dependencies = [ ] [[package]] +name = "html_parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f56db07b6612644f6f7719f8ef944f75fff9d6378fdf3d316fd32194184abd" +dependencies = [ + "doc-comment", + "pest", + "pest_derive", + "serde", + "serde_derive", + "serde_json", + "thiserror 1.0.69", +] + +[[package]] name = "iana-time-zone" version = "0.1.65" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -854,6 +1015,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" [[package]] +name = "imagesize" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09e54e57b4c48b40f7aec75635392b12b3421fa26fe8b4332e63138ed278459c" + +[[package]] name = "indexmap" version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -866,6 +1033,16 @@ dependencies = [ ] [[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "block-padding", + "generic-array", +] + +[[package]] name = "is_terminal_polyfill" version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -898,6 +1075,12 @@ dependencies = [ ] [[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] name = "leb128fmt" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -962,6 +1145,16 @@ dependencies = [ ] [[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + +[[package]] name = "memchr" version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1082,6 +1275,49 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] +name = "pest" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" +dependencies = [ + "memchr", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pest_meta" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" +dependencies = [ + "pest", + "sha2", +] + +[[package]] name = "phf" version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1233,7 +1469,7 @@ dependencies = [ "rayon", "regex", "strum_macros", - "thiserror", + "thiserror 2.0.18", "version_check", "xxhash-rust", ] @@ -1247,7 +1483,7 @@ dependencies = [ "polars-arrow-format", "regex", "simdutf8", - "thiserror", + "thiserror 2.0.18", ] [[package]] @@ -1685,6 +1921,16 @@ dependencies = [ ] [[package]] +name = "quick-xml" +version = "0.37.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +dependencies = [ + "memchr", + "serde", +] + +[[package]] name = "quote" version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1922,6 +2168,17 @@ dependencies = [ ] [[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] name = "shlex" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -2045,6 +2302,12 @@ dependencies = [ ] [[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] name = "syn" version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -2088,12 +2351,38 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" [[package]] +name = "thin-vec" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "144f754d318415ac792f9d69fc87abbbfc043ce2ef041c60f16ad828f638717d" + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] name = "thiserror" version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl", + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -2108,6 +2397,12 @@ dependencies = [ ] [[package]] +name = "thousands" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820" + +[[package]] name = "tinyvec" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -2150,6 +2445,47 @@ dependencies = [ ] [[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "ucd-trie" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" + +[[package]] +name = "umya-spreadsheet" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "408c7e039c96ec1d517a1111ade7fadab889f32c096dac691a1e3b8018c3e39a" +dependencies = [ + "aes", + "ahash", + "base64", + "byteorder", + "cbc", + "cfb", + "chrono", + "encoding_rs", + "fancy-regex", + "getrandom 0.2.17", + "hmac", + "html_parser", + "imagesize", + "lazy_static", + "md-5", + "quick-xml 0.37.5", + "regex", + "sha2", + "thin-vec", + "thousands", + "zip", +] + +[[package]] name = "unicode-ident" version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -2632,7 +2968,7 @@ dependencies = [ [[package]] name = "xlcat" -version = "0.1.0" +version = "0.2.0" dependencies = [ "anyhow", "assert_cmd", @@ -2642,6 +2978,7 @@ dependencies = [ "predicates", "rust_xlsxwriter", "tempfile", + "umya-spreadsheet", ] [[package]] @@ -2683,7 +3020,7 @@ dependencies = [ "flate2", "indexmap", "memchr", - "thiserror", + "thiserror 2.0.18", "zopfli", ] diff --git a/Cargo.toml b/Cargo.toml @@ -1,13 +1,26 @@ [package] name = "xlcat" -version = "0.1.0" +version = "0.2.0" edition = "2024" +[lib] +name = "xlcat" +path = "src/lib.rs" + +[[bin]] +name = "xlcat" +path = "src/bin/xlcat.rs" + +[[bin]] +name = "xlset" +path = "src/bin/xlset.rs" + [dependencies] calamine = "0.26" polars = { version = "0.46", features = ["dtype-date", "dtype-datetime", "dtype-duration", "csv"] } clap = { version = "4", features = ["derive"] } anyhow = "1" +umya-spreadsheet = "2" [profile.release] strip = true diff --git a/src/bin/xlcat.rs b/src/bin/xlcat.rs @@ -0,0 +1,362 @@ +use xlcat::formatter; +use xlcat::metadata; +use xlcat::reader; + +use anyhow::Result; +use clap::Parser; +use polars::prelude::*; +use std::path::PathBuf; +use std::process; + +use xlcat::metadata::{FileInfo, SheetInfo}; + +#[derive(Parser, Debug)] +#[command(name = "xlcat", about = "View Excel files in the terminal")] +struct Cli { + /// Path to .xls or .xlsx file + file: PathBuf, + + /// Show only column names and types + #[arg(long)] + schema: bool, + + /// Show summary statistics + #[arg(long)] + describe: bool, + + /// Show first N rows + #[arg(long)] + head: Option<usize>, + + /// Show last N rows + #[arg(long)] + tail: Option<usize>, + + /// Show all rows (overrides large-file gate) + #[arg(long)] + all: bool, + + /// Select sheet by name or 0-based index + #[arg(long)] + sheet: Option<String>, + + /// Large-file threshold (default: 1M). Accepts: 500K, 1M, 10M, 1G + #[arg(long, default_value = "1M", value_parser = parse_size)] + max_size: u64, + + /// Output as CSV instead of markdown + #[arg(long)] + csv: bool, +} + +fn parse_size(s: &str) -> Result<u64, String> { + let s = s.trim(); + let (num_part, multiplier) = if s.ends_with('G') || s.ends_with('g') { + (&s[..s.len() - 1], 1_073_741_824u64) + } else if s.ends_with("GB") || s.ends_with("gb") { + (&s[..s.len() - 2], 1_073_741_824u64) + } else if s.ends_with('M') || s.ends_with('m') { + (&s[..s.len() - 1], 1_048_576u64) + } else if s.ends_with("MB") || s.ends_with("mb") { + (&s[..s.len() - 2], 1_048_576u64) + } else if s.ends_with('K') || s.ends_with('k') { + (&s[..s.len() - 1], 1_024u64) + } else if s.ends_with("KB") || s.ends_with("kb") { + (&s[..s.len() - 2], 1_024u64) + } else { + (s, 1u64) + }; + let num: f64 = num_part.parse().map_err(|_| format!("Invalid size: {s}"))?; + Ok((num * multiplier as f64) as u64) +} + +// --------------------------------------------------------------------------- +// ArgError — used for user-facing flag/argument errors (exit code 2) +// --------------------------------------------------------------------------- + +#[derive(Debug)] +struct ArgError(String); + +impl std::fmt::Display for ArgError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl std::error::Error for ArgError {} + +// --------------------------------------------------------------------------- +// Sheet resolution +// --------------------------------------------------------------------------- + +enum SheetTarget { + Single(usize), + ListAll, +} + +// --------------------------------------------------------------------------- +// run() — main orchestration +// --------------------------------------------------------------------------- + +fn run(cli: &Cli) -> Result<()> { + // 1. Validate flag combinations + if cli.schema && cli.describe { + return Err(ArgError("--schema and --describe are mutually exclusive".into()).into()); + } + if (cli.schema || cli.describe) + && (cli.head.is_some() || cli.tail.is_some() || cli.all) + { + return Err(ArgError( + "--schema/--describe cannot be combined with --head, --tail, or --all".into(), + ) + .into()); + } + if (cli.schema || cli.describe) && cli.csv { + return Err(ArgError( + "--csv cannot be combined with --schema or --describe".into(), + ) + .into()); + } + + // 2. Read file metadata + let info = metadata::read_file_info(&cli.file)?; + let file_name = cli + .file + .file_name() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_else(|| cli.file.display().to_string()); + + // 3. Resolve sheet target + let target = resolve_sheet_target(cli, &info)?; + + match target { + SheetTarget::Single(idx) => { + let sheet = &info.sheets[idx]; + let df = reader::read_sheet(&cli.file, &sheet.name)?; + render_single_sheet(cli, &file_name, &info, sheet, &df)?; + } + SheetTarget::ListAll => { + if cli.describe { + // --describe on multi-sheet: iterate all sheets + let mut out = formatter::format_header(&file_name, &info); + out.push('\n'); + for sheet in &info.sheets { + let df = reader::read_sheet(&cli.file, &sheet.name)?; + if sheet.rows == 0 && sheet.cols == 0 { + out.push_str(&formatter::format_empty_sheet(sheet)); + } else { + out.push_str(&formatter::format_schema(sheet, &df)); + out.push_str(&formatter::format_describe(&df)); + } + out.push('\n'); + } + print!("{out}"); + } else { + // Default multi-sheet: list schemas + let mut pairs: Vec<(&SheetInfo, DataFrame)> = Vec::new(); + for sheet in &info.sheets { + let df = reader::read_sheet(&cli.file, &sheet.name)?; + pairs.push((sheet, df)); + } + let out = formatter::format_sheet_listing(&file_name, &info, &pairs); + print!("{out}"); + } + } + } + + Ok(()) +} + +fn resolve_sheet_target(cli: &Cli, info: &FileInfo) -> Result<SheetTarget> { + if let Some(ref sheet_arg) = cli.sheet { + // Try name match first + if let Some(idx) = info.sheets.iter().position(|s| s.name == *sheet_arg) { + return Ok(SheetTarget::Single(idx)); + } + // Try 0-based index + if let Ok(idx) = sheet_arg.parse::<usize>() { + if idx < info.sheets.len() { + return Ok(SheetTarget::Single(idx)); + } + return Err(ArgError(format!( + "Sheet index {idx} out of range (file has {} sheets)", + info.sheets.len() + )) + .into()); + } + return Err(ArgError(format!("Sheet not found: {sheet_arg}")).into()); + } + + if info.sheets.len() == 1 { + return Ok(SheetTarget::Single(0)); + } + + // Multi-sheet, no --sheet specified + let has_row_flags = cli.all || cli.head.is_some() || cli.tail.is_some() || cli.csv; + if has_row_flags { + return Err(ArgError( + "Multiple sheets found. Use --sheet <name> to select one before using --all, --head, --tail, or --csv.".into(), + ) + .into()); + } + + Ok(SheetTarget::ListAll) +} + +fn render_single_sheet( + cli: &Cli, + file_name: &str, + info: &FileInfo, + sheet: &SheetInfo, + df: &DataFrame, +) -> Result<()> { + // CSV mode: apply row selection, output CSV, done + if cli.csv { + let selected = apply_row_selection(cli, info, df); + let csv_out = formatter::format_csv(&selected); + print!("{csv_out}"); + return Ok(()); + } + + let mut out = formatter::format_header(file_name, info); + out.push('\n'); + + // Completely empty sheet (0 rows, 0 cols) + if sheet.rows == 0 && sheet.cols == 0 { + out.push_str(&formatter::format_empty_sheet(sheet)); + print!("{out}"); + return Ok(()); + } + + // Header-only sheet (has columns but 0 data rows) + if df.height() == 0 { + out.push_str(&formatter::format_schema(sheet, df)); + out.push_str("\n(no data rows)\n"); + print!("{out}"); + return Ok(()); + } + + if cli.schema { + out.push_str(&formatter::format_schema(sheet, df)); + } else if cli.describe { + out.push_str(&formatter::format_schema(sheet, df)); + out.push_str(&formatter::format_describe(df)); + } else { + // Data mode + out.push_str(&formatter::format_schema(sheet, df)); + out.push('\n'); + out.push_str(&format_data_with_selection(cli, info, df)); + } + + print!("{out}"); + Ok(()) +} + +/// Format data output with row selection logic. +fn format_data_with_selection(cli: &Cli, info: &FileInfo, df: &DataFrame) -> String { + let total = df.height(); + + // --all: show everything + if cli.all { + return formatter::format_data_table(df); + } + + // Explicit --head and/or --tail + if cli.head.is_some() || cli.tail.is_some() { + let head_n = cli.head.unwrap_or(0); + let tail_n = cli.tail.unwrap_or(0); + if head_n + tail_n >= total || (head_n == 0 && tail_n == 0) { + return formatter::format_data_table(df); + } + // If only --head, show first N + if cli.tail.is_none() { + let head_df = df.head(Some(head_n)); + return formatter::format_data_table(&head_df); + } + // If only --tail, show last N + if cli.head.is_none() { + let tail_df = df.tail(Some(tail_n)); + return formatter::format_data_table(&tail_df); + } + // Both specified + return formatter::format_head_tail(df, head_n, tail_n); + } + + // Large file gate: file_size > max_size and no explicit flags + if info.file_size > cli.max_size { + let mut out = formatter::format_head_tail(df, 25, 0); + out.push_str(&format!( + "\nLarge file ({}) — showing first 25 of {total} rows. Use --all to see everything.\n", + metadata::format_file_size(info.file_size) + )); + return out; + } + + // Adaptive default: <=50 rows show all, >50 show head 25 + tail 25 + if total <= 50 { + formatter::format_data_table(df) + } else { + formatter::format_head_tail(df, 25, 25) + } +} + +/// Apply row selection for CSV mode — returns a (possibly sliced) DataFrame. +fn apply_row_selection(cli: &Cli, info: &FileInfo, df: &DataFrame) -> DataFrame { + let total = df.height(); + + if cli.all { + return df.clone(); + } + + if cli.head.is_some() || cli.tail.is_some() { + let head_n = cli.head.unwrap_or(0); + let tail_n = cli.tail.unwrap_or(0); + + if head_n + tail_n >= total || (head_n == 0 && tail_n == 0) { + return df.clone(); + } + + if cli.tail.is_none() { + return df.head(Some(head_n)); + } + if cli.head.is_none() { + return df.tail(Some(tail_n)); + } + + // Both head and tail: combine + let head_df = df.head(Some(head_n)); + let tail_df = df.tail(Some(tail_n)); + return head_df.vstack(&tail_df).unwrap_or_else(|_| df.clone()); + } + + // Large file gate + if info.file_size > cli.max_size { + return df.head(Some(25)); + } + + // Adaptive default + if total <= 50 { + df.clone() + } else { + let head_df = df.head(Some(25)); + let tail_df = df.tail(Some(25)); + head_df.vstack(&tail_df).unwrap_or_else(|_| df.clone()) + } +} + +// --------------------------------------------------------------------------- +// main() +// --------------------------------------------------------------------------- + +fn main() { + let cli = Cli::parse(); + if let Err(err) = run(&cli) { + // Check if the root cause is an ArgError + if err.downcast_ref::<ArgError>().is_some() { + eprintln!("xlcat: {err}"); + process::exit(2); + } + eprintln!("xlcat: {err}"); + process::exit(1); + } +} diff --git a/src/bin/xlset.rs b/src/bin/xlset.rs @@ -0,0 +1,4 @@ +fn main() { + eprintln!("xlset: not yet implemented"); + std::process::exit(1); +} diff --git a/src/cell.rs b/src/cell.rs @@ -0,0 +1 @@ +// Cell address parsing and value type inference (implemented later) diff --git a/src/lib.rs b/src/lib.rs @@ -0,0 +1,5 @@ +pub mod cell; +pub mod formatter; +pub mod metadata; +pub mod reader; +pub mod writer; diff --git a/src/main.rs b/src/main.rs @@ -1,362 +0,0 @@ -mod formatter; -mod metadata; -mod reader; - -use anyhow::Result; -use clap::Parser; -use polars::prelude::*; -use std::path::PathBuf; -use std::process; - -use metadata::{FileInfo, SheetInfo}; - -#[derive(Parser, Debug)] -#[command(name = "xlcat", about = "View Excel files in the terminal")] -struct Cli { - /// Path to .xls or .xlsx file - file: PathBuf, - - /// Show only column names and types - #[arg(long)] - schema: bool, - - /// Show summary statistics - #[arg(long)] - describe: bool, - - /// Show first N rows - #[arg(long)] - head: Option<usize>, - - /// Show last N rows - #[arg(long)] - tail: Option<usize>, - - /// Show all rows (overrides large-file gate) - #[arg(long)] - all: bool, - - /// Select sheet by name or 0-based index - #[arg(long)] - sheet: Option<String>, - - /// Large-file threshold (default: 1M). Accepts: 500K, 1M, 10M, 1G - #[arg(long, default_value = "1M", value_parser = parse_size)] - max_size: u64, - - /// Output as CSV instead of markdown - #[arg(long)] - csv: bool, -} - -fn parse_size(s: &str) -> Result<u64, String> { - let s = s.trim(); - let (num_part, multiplier) = if s.ends_with('G') || s.ends_with('g') { - (&s[..s.len() - 1], 1_073_741_824u64) - } else if s.ends_with("GB") || s.ends_with("gb") { - (&s[..s.len() - 2], 1_073_741_824u64) - } else if s.ends_with('M') || s.ends_with('m') { - (&s[..s.len() - 1], 1_048_576u64) - } else if s.ends_with("MB") || s.ends_with("mb") { - (&s[..s.len() - 2], 1_048_576u64) - } else if s.ends_with('K') || s.ends_with('k') { - (&s[..s.len() - 1], 1_024u64) - } else if s.ends_with("KB") || s.ends_with("kb") { - (&s[..s.len() - 2], 1_024u64) - } else { - (s, 1u64) - }; - let num: f64 = num_part.parse().map_err(|_| format!("Invalid size: {s}"))?; - Ok((num * multiplier as f64) as u64) -} - -// --------------------------------------------------------------------------- -// ArgError — used for user-facing flag/argument errors (exit code 2) -// --------------------------------------------------------------------------- - -#[derive(Debug)] -struct ArgError(String); - -impl std::fmt::Display for ArgError { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}", self.0) - } -} - -impl std::error::Error for ArgError {} - -// --------------------------------------------------------------------------- -// Sheet resolution -// --------------------------------------------------------------------------- - -enum SheetTarget { - Single(usize), - ListAll, -} - -// --------------------------------------------------------------------------- -// run() — main orchestration -// --------------------------------------------------------------------------- - -fn run(cli: &Cli) -> Result<()> { - // 1. Validate flag combinations - if cli.schema && cli.describe { - return Err(ArgError("--schema and --describe are mutually exclusive".into()).into()); - } - if (cli.schema || cli.describe) - && (cli.head.is_some() || cli.tail.is_some() || cli.all) - { - return Err(ArgError( - "--schema/--describe cannot be combined with --head, --tail, or --all".into(), - ) - .into()); - } - if (cli.schema || cli.describe) && cli.csv { - return Err(ArgError( - "--csv cannot be combined with --schema or --describe".into(), - ) - .into()); - } - - // 2. Read file metadata - let info = metadata::read_file_info(&cli.file)?; - let file_name = cli - .file - .file_name() - .map(|s| s.to_string_lossy().to_string()) - .unwrap_or_else(|| cli.file.display().to_string()); - - // 3. Resolve sheet target - let target = resolve_sheet_target(cli, &info)?; - - match target { - SheetTarget::Single(idx) => { - let sheet = &info.sheets[idx]; - let df = reader::read_sheet(&cli.file, &sheet.name)?; - render_single_sheet(cli, &file_name, &info, sheet, &df)?; - } - SheetTarget::ListAll => { - if cli.describe { - // --describe on multi-sheet: iterate all sheets - let mut out = formatter::format_header(&file_name, &info); - out.push('\n'); - for sheet in &info.sheets { - let df = reader::read_sheet(&cli.file, &sheet.name)?; - if sheet.rows == 0 && sheet.cols == 0 { - out.push_str(&formatter::format_empty_sheet(sheet)); - } else { - out.push_str(&formatter::format_schema(sheet, &df)); - out.push_str(&formatter::format_describe(&df)); - } - out.push('\n'); - } - print!("{out}"); - } else { - // Default multi-sheet: list schemas - let mut pairs: Vec<(&SheetInfo, DataFrame)> = Vec::new(); - for sheet in &info.sheets { - let df = reader::read_sheet(&cli.file, &sheet.name)?; - pairs.push((sheet, df)); - } - let out = formatter::format_sheet_listing(&file_name, &info, &pairs); - print!("{out}"); - } - } - } - - Ok(()) -} - -fn resolve_sheet_target(cli: &Cli, info: &FileInfo) -> Result<SheetTarget> { - if let Some(ref sheet_arg) = cli.sheet { - // Try name match first - if let Some(idx) = info.sheets.iter().position(|s| s.name == *sheet_arg) { - return Ok(SheetTarget::Single(idx)); - } - // Try 0-based index - if let Ok(idx) = sheet_arg.parse::<usize>() { - if idx < info.sheets.len() { - return Ok(SheetTarget::Single(idx)); - } - return Err(ArgError(format!( - "Sheet index {idx} out of range (file has {} sheets)", - info.sheets.len() - )) - .into()); - } - return Err(ArgError(format!("Sheet not found: {sheet_arg}")).into()); - } - - if info.sheets.len() == 1 { - return Ok(SheetTarget::Single(0)); - } - - // Multi-sheet, no --sheet specified - let has_row_flags = cli.all || cli.head.is_some() || cli.tail.is_some() || cli.csv; - if has_row_flags { - return Err(ArgError( - "Multiple sheets found. Use --sheet <name> to select one before using --all, --head, --tail, or --csv.".into(), - ) - .into()); - } - - Ok(SheetTarget::ListAll) -} - -fn render_single_sheet( - cli: &Cli, - file_name: &str, - info: &FileInfo, - sheet: &SheetInfo, - df: &DataFrame, -) -> Result<()> { - // CSV mode: apply row selection, output CSV, done - if cli.csv { - let selected = apply_row_selection(cli, info, df); - let csv_out = formatter::format_csv(&selected); - print!("{csv_out}"); - return Ok(()); - } - - let mut out = formatter::format_header(file_name, info); - out.push('\n'); - - // Completely empty sheet (0 rows, 0 cols) - if sheet.rows == 0 && sheet.cols == 0 { - out.push_str(&formatter::format_empty_sheet(sheet)); - print!("{out}"); - return Ok(()); - } - - // Header-only sheet (has columns but 0 data rows) - if df.height() == 0 { - out.push_str(&formatter::format_schema(sheet, df)); - out.push_str("\n(no data rows)\n"); - print!("{out}"); - return Ok(()); - } - - if cli.schema { - out.push_str(&formatter::format_schema(sheet, df)); - } else if cli.describe { - out.push_str(&formatter::format_schema(sheet, df)); - out.push_str(&formatter::format_describe(df)); - } else { - // Data mode - out.push_str(&formatter::format_schema(sheet, df)); - out.push('\n'); - out.push_str(&format_data_with_selection(cli, info, df)); - } - - print!("{out}"); - Ok(()) -} - -/// Format data output with row selection logic. -fn format_data_with_selection(cli: &Cli, info: &FileInfo, df: &DataFrame) -> String { - let total = df.height(); - - // --all: show everything - if cli.all { - return formatter::format_data_table(df); - } - - // Explicit --head and/or --tail - if cli.head.is_some() || cli.tail.is_some() { - let head_n = cli.head.unwrap_or(0); - let tail_n = cli.tail.unwrap_or(0); - if head_n + tail_n >= total || (head_n == 0 && tail_n == 0) { - return formatter::format_data_table(df); - } - // If only --head, show first N - if cli.tail.is_none() { - let head_df = df.head(Some(head_n)); - return formatter::format_data_table(&head_df); - } - // If only --tail, show last N - if cli.head.is_none() { - let tail_df = df.tail(Some(tail_n)); - return formatter::format_data_table(&tail_df); - } - // Both specified - return formatter::format_head_tail(df, head_n, tail_n); - } - - // Large file gate: file_size > max_size and no explicit flags - if info.file_size > cli.max_size { - let mut out = formatter::format_head_tail(df, 25, 0); - out.push_str(&format!( - "\nLarge file ({}) — showing first 25 of {total} rows. Use --all to see everything.\n", - metadata::format_file_size(info.file_size) - )); - return out; - } - - // Adaptive default: <=50 rows show all, >50 show head 25 + tail 25 - if total <= 50 { - formatter::format_data_table(df) - } else { - formatter::format_head_tail(df, 25, 25) - } -} - -/// Apply row selection for CSV mode — returns a (possibly sliced) DataFrame. -fn apply_row_selection(cli: &Cli, info: &FileInfo, df: &DataFrame) -> DataFrame { - let total = df.height(); - - if cli.all { - return df.clone(); - } - - if cli.head.is_some() || cli.tail.is_some() { - let head_n = cli.head.unwrap_or(0); - let tail_n = cli.tail.unwrap_or(0); - - if head_n + tail_n >= total || (head_n == 0 && tail_n == 0) { - return df.clone(); - } - - if cli.tail.is_none() { - return df.head(Some(head_n)); - } - if cli.head.is_none() { - return df.tail(Some(tail_n)); - } - - // Both head and tail: combine - let head_df = df.head(Some(head_n)); - let tail_df = df.tail(Some(tail_n)); - return head_df.vstack(&tail_df).unwrap_or_else(|_| df.clone()); - } - - // Large file gate - if info.file_size > cli.max_size { - return df.head(Some(25)); - } - - // Adaptive default - if total <= 50 { - df.clone() - } else { - let head_df = df.head(Some(25)); - let tail_df = df.tail(Some(25)); - head_df.vstack(&tail_df).unwrap_or_else(|_| df.clone()) - } -} - -// --------------------------------------------------------------------------- -// main() -// --------------------------------------------------------------------------- - -fn main() { - let cli = Cli::parse(); - if let Err(err) = run(&cli) { - // Check if the root cause is an ArgError - if err.downcast_ref::<ArgError>().is_some() { - eprintln!("xlcat: {err}"); - process::exit(2); - } - eprintln!("xlcat: {err}"); - process::exit(1); - } -} diff --git a/src/writer.rs b/src/writer.rs @@ -0,0 +1 @@ +// umya-spreadsheet write logic (implemented later)