xl-cli-tools

CLI tools for viewing and editing Excel files
Log | Files | Refs | README | LICENSE

xlcat.rs (11232B)


      1 use xlcat::formatter;
      2 use xlcat::metadata;
      3 use xlcat::reader;
      4 
      5 use anyhow::Result;
      6 use clap::Parser;
      7 use polars::prelude::*;
      8 use std::path::PathBuf;
      9 use std::process;
     10 
     11 use xlcat::metadata::{FileInfo, SheetInfo};
     12 
     13 #[derive(Parser, Debug)]
     14 #[command(name = "xlcat", about = "View Excel files in the terminal")]
     15 struct Cli {
     16     /// Path to .xls or .xlsx file
     17     file: PathBuf,
     18 
     19     /// Show only column names and types
     20     #[arg(long)]
     21     schema: bool,
     22 
     23     /// Show summary statistics
     24     #[arg(long)]
     25     describe: bool,
     26 
     27     /// Show first N rows
     28     #[arg(long)]
     29     head: Option<usize>,
     30 
     31     /// Show last N rows
     32     #[arg(long)]
     33     tail: Option<usize>,
     34 
     35     /// Show all rows (overrides large-file gate)
     36     #[arg(long)]
     37     all: bool,
     38 
     39     /// Select sheet by name or 0-based index
     40     #[arg(long)]
     41     sheet: Option<String>,
     42 
     43     /// Large-file threshold (default: 1M). Accepts: 500K, 1M, 10M, 1G
     44     #[arg(long, default_value = "1M", value_parser = parse_size)]
     45     max_size: u64,
     46 
     47     /// Output as CSV instead of markdown
     48     #[arg(long)]
     49     csv: bool,
     50 }
     51 
     52 fn parse_size(s: &str) -> Result<u64, String> {
     53     let s = s.trim();
     54     let (num_part, multiplier) = if s.ends_with('G') || s.ends_with('g') {
     55         (&s[..s.len() - 1], 1_073_741_824u64)
     56     } else if s.ends_with("GB") || s.ends_with("gb") {
     57         (&s[..s.len() - 2], 1_073_741_824u64)
     58     } else if s.ends_with('M') || s.ends_with('m') {
     59         (&s[..s.len() - 1], 1_048_576u64)
     60     } else if s.ends_with("MB") || s.ends_with("mb") {
     61         (&s[..s.len() - 2], 1_048_576u64)
     62     } else if s.ends_with('K') || s.ends_with('k') {
     63         (&s[..s.len() - 1], 1_024u64)
     64     } else if s.ends_with("KB") || s.ends_with("kb") {
     65         (&s[..s.len() - 2], 1_024u64)
     66     } else {
     67         (s, 1u64)
     68     };
     69     let num: f64 = num_part.parse().map_err(|_| format!("Invalid size: {s}"))?;
     70     Ok((num * multiplier as f64) as u64)
     71 }
     72 
     73 // ---------------------------------------------------------------------------
     74 // ArgError — used for user-facing flag/argument errors (exit code 2)
     75 // ---------------------------------------------------------------------------
     76 
     77 #[derive(Debug)]
     78 struct ArgError(String);
     79 
     80 impl std::fmt::Display for ArgError {
     81     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
     82         write!(f, "{}", self.0)
     83     }
     84 }
     85 
     86 impl std::error::Error for ArgError {}
     87 
     88 // ---------------------------------------------------------------------------
     89 // Sheet resolution
     90 // ---------------------------------------------------------------------------
     91 
     92 enum SheetTarget {
     93     Single(usize),
     94     ListAll,
     95 }
     96 
     97 // ---------------------------------------------------------------------------
     98 // run() — main orchestration
     99 // ---------------------------------------------------------------------------
    100 
    101 fn run(cli: &Cli) -> Result<()> {
    102     // 1. Validate flag combinations
    103     if cli.schema && cli.describe {
    104         return Err(ArgError("--schema and --describe are mutually exclusive".into()).into());
    105     }
    106     if (cli.schema || cli.describe)
    107         && (cli.head.is_some() || cli.tail.is_some() || cli.all)
    108     {
    109         return Err(ArgError(
    110             "--schema/--describe cannot be combined with --head, --tail, or --all".into(),
    111         )
    112         .into());
    113     }
    114     if (cli.schema || cli.describe) && cli.csv {
    115         return Err(ArgError(
    116             "--csv cannot be combined with --schema or --describe".into(),
    117         )
    118         .into());
    119     }
    120 
    121     // 2. Read file metadata
    122     let info = metadata::read_file_info(&cli.file)?;
    123     let file_name = cli
    124         .file
    125         .file_name()
    126         .map(|s| s.to_string_lossy().to_string())
    127         .unwrap_or_else(|| cli.file.display().to_string());
    128 
    129     // 3. Resolve sheet target
    130     let target = resolve_sheet_target(cli, &info)?;
    131 
    132     match target {
    133         SheetTarget::Single(idx) => {
    134             let sheet = &info.sheets[idx];
    135             let df = reader::read_sheet(&cli.file, &sheet.name)?;
    136             render_single_sheet(cli, &file_name, &info, sheet, &df)?;
    137         }
    138         SheetTarget::ListAll => {
    139             if cli.describe {
    140                 // --describe on multi-sheet: iterate all sheets
    141                 let mut out = formatter::format_header(&file_name, &info);
    142                 out.push('\n');
    143                 for sheet in &info.sheets {
    144                     let df = reader::read_sheet(&cli.file, &sheet.name)?;
    145                     if sheet.rows == 0 && sheet.cols == 0 {
    146                         out.push_str(&formatter::format_empty_sheet(sheet));
    147                     } else {
    148                         out.push_str(&formatter::format_schema(sheet, &df));
    149                         out.push_str(&formatter::format_describe(&df));
    150                     }
    151                     out.push('\n');
    152                 }
    153                 print!("{out}");
    154             } else {
    155                 // Default multi-sheet: list schemas
    156                 let mut pairs: Vec<(&SheetInfo, DataFrame)> = Vec::new();
    157                 for sheet in &info.sheets {
    158                     let df = reader::read_sheet(&cli.file, &sheet.name)?;
    159                     pairs.push((sheet, df));
    160                 }
    161                 let out = formatter::format_sheet_listing(&file_name, &info, &pairs);
    162                 print!("{out}");
    163             }
    164         }
    165     }
    166 
    167     Ok(())
    168 }
    169 
    170 fn resolve_sheet_target(cli: &Cli, info: &FileInfo) -> Result<SheetTarget> {
    171     if let Some(ref sheet_arg) = cli.sheet {
    172         // Try name match first
    173         if let Some(idx) = info.sheets.iter().position(|s| s.name == *sheet_arg) {
    174             return Ok(SheetTarget::Single(idx));
    175         }
    176         // Try 0-based index
    177         if let Ok(idx) = sheet_arg.parse::<usize>() {
    178             if idx < info.sheets.len() {
    179                 return Ok(SheetTarget::Single(idx));
    180             }
    181             return Err(ArgError(format!(
    182                 "Sheet index {idx} out of range (file has {} sheets)",
    183                 info.sheets.len()
    184             ))
    185             .into());
    186         }
    187         return Err(ArgError(format!("Sheet not found: {sheet_arg}")).into());
    188     }
    189 
    190     if info.sheets.len() == 1 {
    191         return Ok(SheetTarget::Single(0));
    192     }
    193 
    194     // Multi-sheet, no --sheet specified
    195     let has_row_flags = cli.all || cli.head.is_some() || cli.tail.is_some() || cli.csv;
    196     if has_row_flags {
    197         return Err(ArgError(
    198             "Multiple sheets found. Use --sheet <name> to select one before using --all, --head, --tail, or --csv.".into(),
    199         )
    200         .into());
    201     }
    202 
    203     Ok(SheetTarget::ListAll)
    204 }
    205 
    206 fn render_single_sheet(
    207     cli: &Cli,
    208     file_name: &str,
    209     info: &FileInfo,
    210     sheet: &SheetInfo,
    211     df: &DataFrame,
    212 ) -> Result<()> {
    213     // CSV mode: apply row selection, output CSV, done
    214     if cli.csv {
    215         let selected = apply_row_selection(cli, info, df);
    216         let csv_out = formatter::format_csv(&selected);
    217         print!("{csv_out}");
    218         return Ok(());
    219     }
    220 
    221     let mut out = formatter::format_header(file_name, info);
    222     out.push('\n');
    223 
    224     // Completely empty sheet (0 rows, 0 cols)
    225     if sheet.rows == 0 && sheet.cols == 0 {
    226         out.push_str(&formatter::format_empty_sheet(sheet));
    227         print!("{out}");
    228         return Ok(());
    229     }
    230 
    231     // Header-only sheet (has columns but 0 data rows)
    232     if df.height() == 0 {
    233         out.push_str(&formatter::format_schema(sheet, df));
    234         out.push_str("\n(no data rows)\n");
    235         print!("{out}");
    236         return Ok(());
    237     }
    238 
    239     if cli.schema {
    240         out.push_str(&formatter::format_schema(sheet, df));
    241     } else if cli.describe {
    242         out.push_str(&formatter::format_schema(sheet, df));
    243         out.push_str(&formatter::format_describe(df));
    244     } else {
    245         // Data mode
    246         out.push_str(&formatter::format_schema(sheet, df));
    247         out.push('\n');
    248         out.push_str(&format_data_with_selection(cli, info, df));
    249     }
    250 
    251     print!("{out}");
    252     Ok(())
    253 }
    254 
    255 /// Format data output with row selection logic.
    256 fn format_data_with_selection(cli: &Cli, info: &FileInfo, df: &DataFrame) -> String {
    257     let total = df.height();
    258 
    259     // --all: show everything
    260     if cli.all {
    261         return formatter::format_data_table(df);
    262     }
    263 
    264     // Explicit --head and/or --tail
    265     if cli.head.is_some() || cli.tail.is_some() {
    266         let head_n = cli.head.unwrap_or(0);
    267         let tail_n = cli.tail.unwrap_or(0);
    268         if head_n + tail_n >= total || (head_n == 0 && tail_n == 0) {
    269             return formatter::format_data_table(df);
    270         }
    271         // If only --head, show first N
    272         if cli.tail.is_none() {
    273             let head_df = df.head(Some(head_n));
    274             return formatter::format_data_table(&head_df);
    275         }
    276         // If only --tail, show last N
    277         if cli.head.is_none() {
    278             let tail_df = df.tail(Some(tail_n));
    279             return formatter::format_data_table(&tail_df);
    280         }
    281         // Both specified
    282         return formatter::format_head_tail(df, head_n, tail_n);
    283     }
    284 
    285     // Large file gate: file_size > max_size and no explicit flags
    286     if info.file_size > cli.max_size {
    287         let mut out = formatter::format_head_tail(df, 25, 0);
    288         out.push_str(&format!(
    289             "\nLarge file ({}) — showing first 25 of {total} rows. Use --all to see everything.\n",
    290             metadata::format_file_size(info.file_size)
    291         ));
    292         return out;
    293     }
    294 
    295     // Adaptive default: <=50 rows show all, >50 show head 25 + tail 25
    296     if total <= 50 {
    297         formatter::format_data_table(df)
    298     } else {
    299         formatter::format_head_tail(df, 25, 25)
    300     }
    301 }
    302 
    303 /// Apply row selection for CSV mode — returns a (possibly sliced) DataFrame.
    304 fn apply_row_selection(cli: &Cli, info: &FileInfo, df: &DataFrame) -> DataFrame {
    305     let total = df.height();
    306 
    307     if cli.all {
    308         return df.clone();
    309     }
    310 
    311     if cli.head.is_some() || cli.tail.is_some() {
    312         let head_n = cli.head.unwrap_or(0);
    313         let tail_n = cli.tail.unwrap_or(0);
    314 
    315         if head_n + tail_n >= total || (head_n == 0 && tail_n == 0) {
    316             return df.clone();
    317         }
    318 
    319         if cli.tail.is_none() {
    320             return df.head(Some(head_n));
    321         }
    322         if cli.head.is_none() {
    323             return df.tail(Some(tail_n));
    324         }
    325 
    326         // Both head and tail: combine
    327         let head_df = df.head(Some(head_n));
    328         let tail_df = df.tail(Some(tail_n));
    329         return head_df.vstack(&tail_df).unwrap_or_else(|_| df.clone());
    330     }
    331 
    332     // Large file gate
    333     if info.file_size > cli.max_size {
    334         return df.head(Some(25));
    335     }
    336 
    337     // Adaptive default
    338     if total <= 50 {
    339         df.clone()
    340     } else {
    341         let head_df = df.head(Some(25));
    342         let tail_df = df.tail(Some(25));
    343         head_df.vstack(&tail_df).unwrap_or_else(|_| df.clone())
    344     }
    345 }
    346 
    347 // ---------------------------------------------------------------------------
    348 // main()
    349 // ---------------------------------------------------------------------------
    350 
    351 fn main() {
    352     let cli = Cli::parse();
    353     if let Err(err) = run(&cli) {
    354         // Check if the root cause is an ArgError
    355         if err.downcast_ref::<ArgError>().is_some() {
    356             eprintln!("xlcat: {err}");
    357             process::exit(2);
    358         }
    359         eprintln!("xlcat: {err}");
    360         process::exit(1);
    361     }
    362 }