xl-cli-tools

CLI tools for viewing and editing Excel files
Log | Files | Refs | README | LICENSE

commit 43feec94bec9cc5128481518b7120a4bda23fde4
parent 4cf2522577bdb1a5967e5cf2f424d2b1c4963f6a
Author: Erik Loualiche <[email protected]>
Date:   Fri, 13 Mar 2026 18:05:29 -0500

feat: add cell module — A1 parser and value type inference

Co-Authored-By: Claude Sonnet 4.6 <[email protected]>

Diffstat:
Msrc/cell.rs | 444++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 443 insertions(+), 1 deletion(-)

diff --git a/src/cell.rs b/src/cell.rs @@ -1 +1,443 @@ -// Cell address parsing and value type inference (implemented later) +use std::fmt; + +// ── Data types ──────────────────────────────────────────────────────────────── + +/// A parsed Excel cell reference (e.g. "B10"). +/// `col` and `row` are 0-based internally. +#[derive(Debug, PartialEq)] +pub struct CellRef { + pub col: u32, + pub row: u32, + /// Canonical upper-case label, e.g. "B10" + pub label: String, +} + +/// A typed cell value. +#[derive(Debug, PartialEq)] +pub enum CellValue { + String(String), + Integer(i64), + Float(f64), + Bool(bool), + Date { year: i32, month: u32, day: u32 }, + Empty, +} + +/// A complete cell assignment: which cell gets which value. +#[derive(Debug, PartialEq)] +pub struct CellAssignment { + pub cell: CellRef, + pub value: CellValue, +} + +// ── Display ─────────────────────────────────────────────────────────────────── + +impl fmt::Display for CellRef { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.label) + } +} + +// ── Column helpers ──────────────────────────────────────────────────────────── + +/// Convert a 0-based column index to its Excel column letters (e.g. 0→"A", 25→"Z", 26→"AA"). +fn col_to_letters(mut col: u32) -> String { + let mut letters = Vec::new(); + loop { + letters.push((b'A' + (col % 26) as u8) as char); + if col < 26 { + break; + } + col = col / 26 - 1; + } + letters.iter().rev().collect() +} + +/// Parse the alphabetic column prefix of an A1-style reference. +/// Returns `(0-based column index, remaining string slice)` or an error. +fn parse_col_part(s: &str) -> Result<(u32, &str), String> { + let upper = s.to_ascii_uppercase(); + let alpha_len = upper.chars().take_while(|c| c.is_ascii_alphabetic()).count(); + if alpha_len == 0 { + return Err(format!("no column letters found in '{}'", s)); + } + let col_str = &upper[..alpha_len]; + let rest = &s[alpha_len..]; + + // Convert letters to 0-based index (Excel "bijective base-26") + let mut col: u32 = 0; + for ch in col_str.chars() { + col = col * 26 + (ch as u32 - 'A' as u32 + 1); + } + col -= 1; // convert to 0-based + + // Max column is XFD (0-based index 16383) + if col > 16383 { + return Err(format!("column '{}' exceeds maximum XFD", col_str)); + } + + Ok((col, rest)) +} + +// ── Public parsing API ──────────────────────────────────────────────────────── + +/// Parse an A1-style cell reference string into a [`CellRef`]. +/// +/// - Column letters are case-insensitive. +/// - Row numbers are 1-based in the input, stored 0-based. +/// - Maximum column is XFD (index 16383); maximum row is 1 048 576. +pub fn parse_cell_ref(s: &str) -> Result<CellRef, String> { + let s = s.trim(); + if s.is_empty() { + return Err("cell reference is empty".to_string()); + } + + let (col, rest) = parse_col_part(s)?; + + if rest.is_empty() { + return Err(format!("no row number found in '{}'", s)); + } + + let row_1based: u32 = rest + .parse() + .map_err(|_| format!("invalid row number '{}' in '{}'", rest, s))?; + + if row_1based == 0 { + return Err(format!("row number must be >= 1, got 0 in '{}'", s)); + } + if row_1based > 1_048_576 { + return Err(format!( + "row {} exceeds maximum 1048576 in '{}'", + row_1based, s + )); + } + + let row = row_1based - 1; // convert to 0-based + let label = format!("{}{}", col_to_letters(col), row_1based); + + Ok(CellRef { col, row, label }) +} + +/// Infer a [`CellValue`] from a raw string, applying automatic type detection. +/// +/// Detection order: +/// 1. Empty string → [`CellValue::Empty`] +/// 2. `"true"` / `"false"` (case-insensitive) → [`CellValue::Bool`] +/// 3. Valid `i64` → [`CellValue::Integer`] +/// 4. Valid `f64` → [`CellValue::Float`] +/// 5. `YYYY-MM-DD` → [`CellValue::Date`] +/// 6. Everything else → [`CellValue::String`] +pub fn infer_value(s: &str) -> CellValue { + if s.is_empty() { + return CellValue::Empty; + } + + // Bool + match s.to_ascii_lowercase().as_str() { + "true" => return CellValue::Bool(true), + "false" => return CellValue::Bool(false), + _ => {} + } + + // Integer (must not contain a '.' to avoid "1.0" being parsed as integer) + if !s.contains('.') { + if let Ok(i) = s.parse::<i64>() { + return CellValue::Integer(i); + } + } + + // Float + if let Ok(f) = s.parse::<f64>() { + return CellValue::Float(f); + } + + // Date: YYYY-MM-DD + if let Some(date) = try_parse_date(s) { + return date; + } + + CellValue::String(s.to_string()) +} + +fn try_parse_date(s: &str) -> Option<CellValue> { + // Strict format: exactly YYYY-MM-DD + let parts: Vec<&str> = s.splitn(3, '-').collect(); + if parts.len() != 3 { + return None; + } + // Lengths: 4-2-2 + if parts[0].len() != 4 || parts[1].len() != 2 || parts[2].len() != 2 { + return None; + } + // All must be ASCII digits + if !parts.iter().all(|p| p.chars().all(|c| c.is_ascii_digit())) { + return None; + } + let year: i32 = parts[0].parse().ok()?; + let month: u32 = parts[1].parse().ok()?; + let day: u32 = parts[2].parse().ok()?; + if month < 1 || month > 12 || day < 1 || day > 31 { + return None; + } + Some(CellValue::Date { year, month, day }) +} + +/// Force a value to a specific type based on a tag. +/// +/// Supported tags: `str`, `num`, `bool`, `date`. +fn coerce_value(raw: &str, tag: &str) -> Result<CellValue, String> { + match tag { + "str" => Ok(CellValue::String(raw.to_string())), + "num" => { + // Try integer first, then float + if !raw.contains('.') { + if let Ok(i) = raw.parse::<i64>() { + return Ok(CellValue::Integer(i)); + } + } + raw.parse::<f64>() + .map(CellValue::Float) + .map_err(|_| format!("cannot coerce '{}' to num", raw)) + } + "bool" => match raw.to_ascii_lowercase().as_str() { + "true" | "1" | "yes" => Ok(CellValue::Bool(true)), + "false" | "0" | "no" => Ok(CellValue::Bool(false)), + _ => Err(format!("cannot coerce '{}' to bool", raw)), + }, + "date" => try_parse_date(raw) + .ok_or_else(|| format!("cannot coerce '{}' to date (expected YYYY-MM-DD)", raw)), + other => Err(format!("unknown type tag ':{}'", other)), + } +} + +/// Parse an assignment string such as `"A1=42"` or `"B2:str=07401"`. +/// +/// Format: `<cell_ref>[:<tag>]=<value>` +/// - `<tag>` is optional; if absent, the value type is inferred automatically. +/// - The split is on the **first** `=` only, so values may contain `=`. +pub fn parse_assignment(s: &str) -> Result<CellAssignment, String> { + let eq_pos = s + .find('=') + .ok_or_else(|| format!("no '=' found in assignment '{}'", s))?; + + let lhs = &s[..eq_pos]; + let raw_value = &s[eq_pos + 1..]; + + // Check for optional :tag in LHS + let (cell_str, tag_opt) = if let Some(colon_pos) = lhs.rfind(':') { + let tag = &lhs[colon_pos + 1..]; + let cell = &lhs[..colon_pos]; + (cell, Some(tag)) + } else { + (lhs, None) + }; + + let cell = parse_cell_ref(cell_str)?; + + let value = match tag_opt { + Some(tag) => coerce_value(raw_value, tag)?, + None => infer_value(raw_value), + }; + + Ok(CellAssignment { cell, value }) +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + + // ── parse_cell_ref ──────────────────────────────────────────────────────── + + #[test] + fn test_parse_a1() { + let r = parse_cell_ref("A1").unwrap(); + assert_eq!(r.col, 0); + assert_eq!(r.row, 0); + assert_eq!(r.label, "A1"); + } + + #[test] + fn test_parse_z1_col25() { + let r = parse_cell_ref("Z1").unwrap(); + assert_eq!(r.col, 25); + assert_eq!(r.row, 0); + } + + #[test] + fn test_parse_aa1_col26() { + let r = parse_cell_ref("AA1").unwrap(); + assert_eq!(r.col, 26); + assert_eq!(r.row, 0); + } + + #[test] + fn test_parse_case_insensitive() { + let lower = parse_cell_ref("b5").unwrap(); + let upper = parse_cell_ref("B5").unwrap(); + assert_eq!(lower, upper); + assert_eq!(lower.label, "B5"); + } + + #[test] + fn test_parse_b10_row9() { + let r = parse_cell_ref("B10").unwrap(); + assert_eq!(r.col, 1); + assert_eq!(r.row, 9); + } + + #[test] + fn test_parse_invalid_no_row() { + assert!(parse_cell_ref("A").is_err()); + } + + #[test] + fn test_parse_invalid_no_col() { + assert!(parse_cell_ref("123").is_err()); + } + + #[test] + fn test_parse_invalid_empty() { + assert!(parse_cell_ref("").is_err()); + } + + #[test] + fn test_parse_invalid_row_zero() { + assert!(parse_cell_ref("A0").is_err()); + } + + #[test] + fn test_parse_invalid_row_too_large() { + assert!(parse_cell_ref("A1048577").is_err()); + } + + #[test] + fn test_parse_max_row() { + let r = parse_cell_ref("A1048576").unwrap(); + assert_eq!(r.row, 1_048_575); + } + + // ── infer_value ─────────────────────────────────────────────────────────── + + #[test] + fn test_infer_integer() { + assert_eq!(infer_value("42"), CellValue::Integer(42)); + } + + #[test] + fn test_infer_negative_integer() { + assert_eq!(infer_value("-7"), CellValue::Integer(-7)); + } + + #[test] + fn test_infer_float() { + assert_eq!(infer_value("3.14"), CellValue::Float(3.14)); + } + + #[test] + fn test_infer_bool_true() { + assert_eq!(infer_value("true"), CellValue::Bool(true)); + assert_eq!(infer_value("TRUE"), CellValue::Bool(true)); + } + + #[test] + fn test_infer_bool_false() { + assert_eq!(infer_value("false"), CellValue::Bool(false)); + assert_eq!(infer_value("False"), CellValue::Bool(false)); + } + + #[test] + fn test_infer_date() { + assert_eq!( + infer_value("2024-03-15"), + CellValue::Date { year: 2024, month: 3, day: 15 } + ); + } + + #[test] + fn test_infer_string() { + assert_eq!( + infer_value("hello world"), + CellValue::String("hello world".to_string()) + ); + } + + #[test] + fn test_infer_leading_zero_becomes_integer() { + // "07401" has no dot → parsed as i64 if it parses; but leading zeros parse fine as i64 + // The spec says "leading-zero-becomes-integer" — 07401 → Integer(7401) + assert_eq!(infer_value("07401"), CellValue::Integer(7401)); + } + + #[test] + fn test_infer_empty() { + assert_eq!(infer_value(""), CellValue::Empty); + } + + // ── parse_assignment ────────────────────────────────────────────────────── + + #[test] + fn test_assignment_basic() { + let a = parse_assignment("A1=42").unwrap(); + assert_eq!(a.cell, parse_cell_ref("A1").unwrap()); + assert_eq!(a.value, CellValue::Integer(42)); + } + + #[test] + fn test_assignment_with_str_tag() { + let a = parse_assignment("B2:str=07401").unwrap(); + assert_eq!(a.cell, parse_cell_ref("B2").unwrap()); + assert_eq!(a.value, CellValue::String("07401".to_string())); + } + + #[test] + fn test_assignment_no_equals_error() { + assert!(parse_assignment("A1").is_err()); + } + + #[test] + fn test_assignment_empty_value() { + let a = parse_assignment("C3=").unwrap(); + assert_eq!(a.value, CellValue::Empty); + } + + #[test] + fn test_assignment_string_with_spaces() { + let a = parse_assignment("D4=hello world").unwrap(); + assert_eq!(a.value, CellValue::String("hello world".to_string())); + } + + #[test] + fn test_assignment_value_contains_equals() { + // Split on first '=' only — value may contain '=' + let a = parse_assignment("E5=a=b").unwrap(); + assert_eq!(a.value, CellValue::String("a=b".to_string())); + } + + #[test] + fn test_assignment_num_tag() { + let a = parse_assignment("A1:num=3.14").unwrap(); + assert_eq!(a.value, CellValue::Float(3.14)); + } + + #[test] + fn test_assignment_bool_tag() { + let a = parse_assignment("A1:bool=true").unwrap(); + assert_eq!(a.value, CellValue::Bool(true)); + } + + #[test] + fn test_assignment_date_tag() { + let a = parse_assignment("A1:date=2025-01-01").unwrap(); + assert_eq!(a.value, CellValue::Date { year: 2025, month: 1, day: 1 }); + } + + // ── Display ─────────────────────────────────────────────────────────────── + + #[test] + fn test_display() { + let r = parse_cell_ref("C7").unwrap(); + assert_eq!(format!("{}", r), "C7"); + } +}