xl-cli-tools

CLI tools for viewing and editing Excel files
Log | Files | Refs | README | LICENSE

xlfilter.rs (5630B)


      1 use std::path::PathBuf;
      2 use std::process;
      3 
      4 use anyhow::Result;
      5 use clap::Parser;
      6 
      7 use xlcat::filter::{
      8     parse_filter_expr, parse_sort_spec, filter_pipeline, FilterOptions,
      9 };
     10 use xlcat::formatter;
     11 use xlcat::metadata;
     12 use xlcat::metadata::SheetInfo;
     13 use xlcat::reader;
     14 
     15 #[derive(Parser)]
     16 #[command(
     17     name = "xlfilter",
     18     about = "Filter and query Excel spreadsheet data",
     19     version
     20 )]
     21 struct Args {
     22     /// Path to .xls or .xlsx file
     23     file: PathBuf,
     24 
     25     /// Select columns by letter (A,B,D) or header name (State,Amount)
     26     #[arg(long)]
     27     cols: Option<String>,
     28 
     29     /// Filter rows. Multiple = AND. Operators: = != > < >= <= ~ (contains, case-insensitive) !~ (not contains)
     30     #[arg(long = "where")]
     31     filters: Vec<String>,
     32 
     33     /// Sort by column. Format: col:dir (dir = asc or desc, default asc)
     34     #[arg(long)]
     35     sort: Option<String>,
     36 
     37     /// Max rows in output (applied after filtering)
     38     #[arg(long)]
     39     limit: Option<usize>,
     40 
     41     /// First N rows (applied before filtering)
     42     #[arg(long)]
     43     head: Option<usize>,
     44 
     45     /// Last N rows (applied before filtering)
     46     #[arg(long)]
     47     tail: Option<usize>,
     48 
     49     /// Target sheet by name or 0-based index (default: first sheet)
     50     #[arg(long)]
     51     sheet: Option<String>,
     52 
     53     /// Output as CSV instead of markdown table
     54     #[arg(long)]
     55     csv: bool,
     56 
     57     /// Skip first N data rows (for metadata/title rows above the real header)
     58     #[arg(long)]
     59     skip: Option<usize>,
     60 }
     61 
     62 // ---------------------------------------------------------------------------
     63 // ArgError — used for user-facing flag/argument errors (exit code 2)
     64 // ---------------------------------------------------------------------------
     65 
     66 #[derive(Debug)]
     67 struct ArgError(String);
     68 
     69 impl std::fmt::Display for ArgError {
     70     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
     71         write!(f, "{}", self.0)
     72     }
     73 }
     74 
     75 impl std::error::Error for ArgError {}
     76 
     77 fn run(args: Args) -> Result<()> {
     78     // Validate file exists
     79     if !args.file.exists() {
     80         anyhow::bail!(ArgError(format!(
     81             "file not found: {}",
     82             args.file.display()
     83         )));
     84     }
     85 
     86     // Validate mutually exclusive flags
     87     if args.head.is_some() && args.tail.is_some() {
     88         anyhow::bail!(ArgError(
     89             "--head and --tail are mutually exclusive".to_string()
     90         ));
     91     }
     92 
     93     // Read file metadata to resolve sheet name
     94     let info = metadata::read_file_info(&args.file)?;
     95     let sheet_name = resolve_sheet(&info, args.sheet.as_deref())?;
     96 
     97     // Read sheet into DataFrame
     98     let df = if let Some(skip) = args.skip {
     99         reader::read_sheet_with_skip(&args.file, &sheet_name, skip)?
    100     } else {
    101         reader::read_sheet(&args.file, &sheet_name)?
    102     };
    103 
    104     if df.height() == 0 {
    105         eprintln!("0 rows");
    106         let sheet_info = info
    107             .sheets
    108             .iter()
    109             .find(|s| s.name == sheet_name)
    110             .cloned()
    111             .unwrap_or(SheetInfo {
    112                 name: sheet_name,
    113                 rows: 0,
    114                 cols: 0,
    115             });
    116         println!("{}", formatter::format_empty_sheet(&sheet_info));
    117         return Ok(());
    118     }
    119 
    120     // Parse filter expressions
    121     let filters: Vec<_> = args
    122         .filters
    123         .iter()
    124         .map(|s| parse_filter_expr(s))
    125         .collect::<Result<Vec<_>, _>>()
    126         .map_err(|e| anyhow::anyhow!(ArgError(e)))?;
    127 
    128     // Parse sort spec
    129     let sort = args
    130         .sort
    131         .as_deref()
    132         .map(parse_sort_spec)
    133         .transpose()
    134         .map_err(|e| anyhow::anyhow!(ArgError(e)))?;
    135 
    136     // Parse column selection
    137     let cols = args.cols.map(|s| {
    138         s.split(',')
    139             .map(|c| c.trim().to_string())
    140             .collect::<Vec<_>>()
    141     });
    142 
    143     // Run pipeline
    144     let opts = FilterOptions {
    145         filters,
    146         cols,
    147         sort,
    148         limit: args.limit,
    149         head: args.head,
    150         tail: args.tail,
    151     };
    152     let result = filter_pipeline(df, &opts)?;
    153 
    154     // Output row count to stderr
    155     eprintln!("{} rows", result.height());
    156 
    157     // Format output
    158     if result.height() == 0 {
    159         println!("{}", formatter::format_data_table(&result));
    160     } else if args.csv {
    161         print!("{}", formatter::format_csv(&result));
    162     } else {
    163         println!("{}", formatter::format_data_table(&result));
    164     }
    165 
    166     Ok(())
    167 }
    168 
    169 /// Resolve sheet name from --sheet flag or default to first sheet.
    170 fn resolve_sheet(info: &metadata::FileInfo, sheet_arg: Option<&str>) -> Result<String> {
    171     match sheet_arg {
    172         None => {
    173             info.sheets
    174                 .first()
    175                 .map(|s| s.name.clone())
    176                 .ok_or_else(|| anyhow::anyhow!("workbook has no sheets"))
    177         }
    178         Some(s) => {
    179             if let Some(sheet) = info.sheets.iter().find(|si| si.name == s) {
    180                 return Ok(sheet.name.clone());
    181             }
    182             if let Ok(idx) = s.parse::<usize>() {
    183                 if let Some(sheet) = info.sheets.get(idx) {
    184                     return Ok(sheet.name.clone());
    185                 }
    186             }
    187             let names: Vec<_> = info.sheets.iter().map(|s| s.name.as_str()).collect();
    188             anyhow::bail!(ArgError(format!(
    189                 "sheet '{}' not found. Available sheets: {}",
    190                 s,
    191                 names.join(", ")
    192             )))
    193         }
    194     }
    195 }
    196 
    197 fn main() {
    198     let args = Args::parse();
    199     if let Err(err) = run(args) {
    200         if err.downcast_ref::<ArgError>().is_some() {
    201             eprintln!("xlfilter: {err}");
    202             process::exit(2);
    203         }
    204         eprintln!("xlfilter: {err}");
    205         process::exit(1);
    206     }
    207 }