filter.rs (23352B)
1 use anyhow::Result; 2 use polars::prelude::*; 3 4 #[derive(Debug, Clone, PartialEq)] 5 pub enum FilterOp { 6 Eq, 7 NotEq, 8 Gt, 9 Lt, 10 Gte, 11 Lte, 12 Contains, 13 NotContains, 14 } 15 16 #[derive(Debug, Clone)] 17 pub struct FilterExpr { 18 pub column: String, 19 pub op: FilterOp, 20 pub value: String, 21 } 22 23 #[derive(Debug, Clone)] 24 pub struct SortSpec { 25 pub column: String, 26 pub descending: bool, 27 } 28 29 /// Parse a filter expression like "State=CA", "Amount>1000", "Name~john". 30 /// Scans left-to-right for the first operator character (= ! > < ~), 31 /// then determines the full operator. 32 pub fn parse_filter_expr(s: &str) -> Result<FilterExpr, String> { 33 let op_chars = ['=', '!', '>', '<', '~']; 34 let pos = s 35 .find(|c: char| op_chars.contains(&c)) 36 .ok_or_else(|| { 37 format!( 38 "no operator found in '{}'. Use =, !=, >, <, >=, <=, ~ or !~", 39 s 40 ) 41 })?; 42 if pos == 0 { 43 return Err(format!("missing column name in '{}'", s)); 44 } 45 let column = s[..pos].to_string(); 46 let rest = &s[pos..]; 47 let (op, op_len) = if rest.starts_with(">=") { 48 (FilterOp::Gte, 2) 49 } else if rest.starts_with("<=") { 50 (FilterOp::Lte, 2) 51 } else if rest.starts_with("!=") { 52 (FilterOp::NotEq, 2) 53 } else if rest.starts_with("!~") { 54 (FilterOp::NotContains, 2) 55 } else if rest.starts_with('>') { 56 (FilterOp::Gt, 1) 57 } else if rest.starts_with('<') { 58 (FilterOp::Lt, 1) 59 } else if rest.starts_with('=') { 60 (FilterOp::Eq, 1) 61 } else if rest.starts_with('~') { 62 (FilterOp::Contains, 1) 63 } else { 64 return Err(format!("invalid operator in '{}'", s)); 65 }; 66 let value = rest[op_len..].to_string(); 67 Ok(FilterExpr { column, op, value }) 68 } 69 70 /// Parse a sort spec like "Amount:desc" or "Name" (default asc). 71 /// Splits on the last colon so column names containing colons are supported. 72 pub fn parse_sort_spec(s: &str) -> Result<SortSpec, String> { 73 if let Some(colon_pos) = s.rfind(':') { 74 let col = &s[..colon_pos]; 75 let dir = &s[colon_pos + 1..]; 76 match dir.to_lowercase().as_str() { 77 "asc" => Ok(SortSpec { 78 column: col.to_string(), 79 descending: false, 80 }), 81 "desc" => Ok(SortSpec { 82 column: col.to_string(), 83 descending: true, 84 }), 85 _ => Err(format!( 86 "invalid sort direction '{}'. Use 'asc' or 'desc'", 87 dir 88 )), 89 } 90 } else { 91 Ok(SortSpec { 92 column: s.to_string(), 93 descending: false, 94 }) 95 } 96 } 97 98 /// Convert a column letter like "A", "B", "AA" to a 0-based index. 99 /// Returns None if the string isn't purely alphabetic or is empty. 100 fn col_letter_to_index(s: &str) -> Option<usize> { 101 if s.is_empty() || !s.chars().all(|c| c.is_ascii_alphabetic()) { 102 return None; 103 } 104 let mut idx: usize = 0; 105 for c in s.to_uppercase().chars() { 106 idx = idx * 26 + (c as usize - 'A' as usize + 1); 107 } 108 Some(idx - 1) 109 } 110 111 /// Resolve a column specifier to a DataFrame column name. 112 /// Accepts either: 113 /// - A header name (exact match first, then case-insensitive) 114 /// - A column letter like "A", "B", "AA" (mapped by position) 115 /// Header name match takes priority over column letter interpretation. 116 pub fn resolve_column(spec: &str, df_columns: &[String]) -> Result<String, String> { 117 // 1. Exact header name match 118 if df_columns.contains(&spec.to_string()) { 119 return Ok(spec.to_string()); 120 } 121 // 2. Case-insensitive header name match 122 let spec_lower = spec.to_lowercase(); 123 for col in df_columns { 124 if col.to_lowercase() == spec_lower { 125 return Ok(col.clone()); 126 } 127 } 128 // 3. Column letter (A=0, B=1, ...) — only if purely alphabetic 129 if let Some(idx) = col_letter_to_index(spec) { 130 if idx < df_columns.len() { 131 return Ok(df_columns[idx].clone()); 132 } 133 } 134 let available = df_columns.join(", "); 135 Err(format!("column '{}' not found. Available columns: {}", spec, available)) 136 } 137 138 /// Resolve a list of column specifiers to DataFrame column names. 139 pub fn resolve_columns(specs: &[String], df_columns: &[String]) -> Result<Vec<String>, String> { 140 specs.iter().map(|s| resolve_column(s, df_columns)).collect() 141 } 142 143 /// Check if a polars DataType is numeric. 144 fn is_numeric_dtype(dtype: &DataType) -> bool { 145 matches!( 146 dtype, 147 DataType::Int8 148 | DataType::Int16 149 | DataType::Int32 150 | DataType::Int64 151 | DataType::UInt8 152 | DataType::UInt16 153 | DataType::UInt32 154 | DataType::UInt64 155 | DataType::Float32 156 | DataType::Float64 157 ) 158 } 159 160 /// Build a boolean mask for a single filter expression against a DataFrame. 161 fn build_filter_mask(df: &DataFrame, expr: &FilterExpr) -> Result<BooleanChunked> { 162 let col = df.column(&expr.column).map_err(|e| anyhow::anyhow!("{}", e))?; 163 let series = col.as_materialized_series(); 164 let dtype = series.dtype(); 165 166 match &expr.op { 167 FilterOp::Eq => { 168 if is_numeric_dtype(dtype) { 169 if let Ok(n) = expr.value.parse::<f64>() { 170 let s = series.cast(&DataType::Float64)?; 171 return Ok(s.f64()?.equal(n)); 172 } 173 } 174 let s = series.cast(&DataType::String)?; 175 Ok(s.str()?.equal(expr.value.as_str())) 176 } 177 FilterOp::NotEq => { 178 if is_numeric_dtype(dtype) { 179 if let Ok(n) = expr.value.parse::<f64>() { 180 let s = series.cast(&DataType::Float64)?; 181 return Ok(s.f64()?.not_equal(n)); 182 } 183 } 184 let s = series.cast(&DataType::String)?; 185 Ok(s.str()?.not_equal(expr.value.as_str())) 186 } 187 FilterOp::Gt => { 188 let n = parse_numeric_value(&expr.value, ">")?; 189 let s = series.cast(&DataType::Float64)?; 190 Ok(s.f64()?.gt(n)) 191 } 192 FilterOp::Lt => { 193 let n = parse_numeric_value(&expr.value, "<")?; 194 let s = series.cast(&DataType::Float64)?; 195 Ok(s.f64()?.lt(n)) 196 } 197 FilterOp::Gte => { 198 let n = parse_numeric_value(&expr.value, ">=")?; 199 let s = series.cast(&DataType::Float64)?; 200 Ok(s.f64()?.gt_eq(n)) 201 } 202 FilterOp::Lte => { 203 let n = parse_numeric_value(&expr.value, "<=")?; 204 let s = series.cast(&DataType::Float64)?; 205 Ok(s.f64()?.lt_eq(n)) 206 } 207 FilterOp::Contains => { 208 let s = series.cast(&DataType::String)?; 209 let ca = s.str()?; 210 let pat = expr.value.to_lowercase(); 211 let mask: BooleanChunked = ca.into_iter() 212 .map(|opt_s| opt_s.map(|s| s.to_lowercase().contains(&pat)).unwrap_or(false)) 213 .collect(); 214 Ok(mask) 215 } 216 FilterOp::NotContains => { 217 let s = series.cast(&DataType::String)?; 218 let ca = s.str()?; 219 let pat = expr.value.to_lowercase(); 220 let mask: BooleanChunked = ca.into_iter() 221 .map(|opt_s| opt_s.map(|s| !s.to_lowercase().contains(&pat)).unwrap_or(true)) 222 .collect(); 223 Ok(mask) 224 } 225 } 226 } 227 228 fn parse_numeric_value(value: &str, op: &str) -> Result<f64> { 229 value 230 .parse::<f64>() 231 .map_err(|_| anyhow::anyhow!("'{}' requires numeric value, got '{}'", op, value)) 232 } 233 234 /// Apply a list of filter expressions to a DataFrame (AND logic). 235 /// An empty list returns the DataFrame unchanged. 236 pub fn apply_filters(df: &DataFrame, exprs: &[FilterExpr]) -> Result<DataFrame> { 237 let mut result = df.clone(); 238 for expr in exprs { 239 let mask = build_filter_mask(&result, expr)?; 240 result = result.filter(&mask)?; 241 } 242 Ok(result) 243 } 244 245 /// Options for the filter pipeline. 246 pub struct FilterOptions { 247 pub filters: Vec<FilterExpr>, 248 pub cols: Option<Vec<String>>, 249 pub sort: Option<SortSpec>, 250 pub limit: Option<usize>, 251 pub head: Option<usize>, 252 pub tail: Option<usize>, 253 } 254 255 /// Apply a sort specification to a DataFrame. 256 pub fn apply_sort(df: &DataFrame, spec: &SortSpec) -> Result<DataFrame> { 257 let opts = SortMultipleOptions::default() 258 .with_order_descending(spec.descending); 259 Ok(df.sort([&spec.column], opts)?) 260 } 261 262 /// Run the full filter pipeline: head/tail → resolve & filter → sort → limit → select columns. 263 pub fn filter_pipeline(df: DataFrame, opts: &FilterOptions) -> Result<DataFrame> { 264 let df_columns: Vec<String> = df 265 .get_column_names() 266 .iter() 267 .map(|s| s.to_string()) 268 .collect(); 269 270 // 1. Pre-filter window: head or tail 271 let df = if let Some(n) = opts.head { 272 df.head(Some(n)) 273 } else if let Some(n) = opts.tail { 274 df.tail(Some(n)) 275 } else { 276 df 277 }; 278 279 // 2. Resolve column names in filter expressions and apply filters 280 let resolved_filters: Vec<FilterExpr> = opts 281 .filters 282 .iter() 283 .map(|f| { 284 let resolved_col = resolve_column(&f.column, &df_columns)?; 285 Ok(FilterExpr { 286 column: resolved_col, 287 op: f.op.clone(), 288 value: f.value.clone(), 289 }) 290 }) 291 .collect::<Result<Vec<_>, String>>() 292 .map_err(|e| anyhow::anyhow!("{}", e))?; 293 294 let df = apply_filters(&df, &resolved_filters)?; 295 296 // 3. Sort 297 let df = if let Some(ref spec) = opts.sort { 298 let resolved_col = resolve_column(&spec.column, &df_columns) 299 .map_err(|e| anyhow::anyhow!("{}", e))?; 300 let resolved_spec = SortSpec { 301 column: resolved_col, 302 descending: spec.descending, 303 }; 304 apply_sort(&df, &resolved_spec)? 305 } else { 306 df 307 }; 308 309 // 4. Limit (after filtering and sorting) 310 let df = if let Some(n) = opts.limit { 311 df.head(Some(n)) 312 } else { 313 df 314 }; 315 316 // 5. Select columns 317 let df = if let Some(ref col_specs) = opts.cols { 318 let resolved_cols = resolve_columns(col_specs, &df_columns) 319 .map_err(|e| anyhow::anyhow!("{}", e))?; 320 let col_refs: Vec<&str> = resolved_cols.iter().map(|s| s.as_str()).collect(); 321 df.select(col_refs)? 322 } else { 323 df 324 }; 325 326 Ok(df) 327 } 328 329 #[cfg(test)] 330 mod tests { 331 use super::*; 332 333 fn make_test_df() -> DataFrame { 334 DataFrame::new(vec![ 335 Column::new("State".into(), &["CA", "NY", "CA", "TX", "NY"]), 336 Column::new("City".into(), &["LA", "NYC", "SF", "Houston", "Albany"]), 337 Column::new("Amount".into(), &[1500i64, 2000, 800, 1200, 500]), 338 Column::new("Year".into(), &[2023i64, 2023, 2024, 2024, 2023]), 339 Column::new("Status".into(), &["Active", "Active", "Draft", "Active", "Draft"]), 340 ]) 341 .unwrap() 342 } 343 344 #[test] 345 fn filter_eq_string() { 346 let df = make_test_df(); 347 let expr = parse_filter_expr("State=CA").unwrap(); 348 let result = apply_filters(&df, &[expr]).unwrap(); 349 assert_eq!(result.height(), 2); 350 } 351 352 #[test] 353 fn filter_eq_numeric() { 354 let df = make_test_df(); 355 let expr = parse_filter_expr("Amount=1500").unwrap(); 356 let result = apply_filters(&df, &[expr]).unwrap(); 357 assert_eq!(result.height(), 1); 358 } 359 360 #[test] 361 fn filter_not_eq() { 362 let df = make_test_df(); 363 let expr = parse_filter_expr("Status!=Draft").unwrap(); 364 let result = apply_filters(&df, &[expr]).unwrap(); 365 assert_eq!(result.height(), 3); 366 } 367 368 #[test] 369 fn filter_gt() { 370 let df = make_test_df(); 371 let expr = parse_filter_expr("Amount>1000").unwrap(); 372 let result = apply_filters(&df, &[expr]).unwrap(); 373 assert_eq!(result.height(), 3); 374 } 375 376 #[test] 377 fn filter_lt() { 378 let df = make_test_df(); 379 let expr = parse_filter_expr("Amount<1000").unwrap(); 380 let result = apply_filters(&df, &[expr]).unwrap(); 381 assert_eq!(result.height(), 2); 382 } 383 384 #[test] 385 fn filter_gte() { 386 let df = make_test_df(); 387 let expr = parse_filter_expr("Amount>=1500").unwrap(); 388 let result = apply_filters(&df, &[expr]).unwrap(); 389 assert_eq!(result.height(), 2); 390 } 391 392 #[test] 393 fn filter_lte() { 394 let df = make_test_df(); 395 let expr = parse_filter_expr("Amount<=800").unwrap(); 396 let result = apply_filters(&df, &[expr]).unwrap(); 397 assert_eq!(result.height(), 2); 398 } 399 400 #[test] 401 fn filter_contains() { 402 let df = make_test_df(); 403 let expr = parse_filter_expr("City~ou").unwrap(); 404 let result = apply_filters(&df, &[expr]).unwrap(); 405 assert_eq!(result.height(), 1); 406 } 407 408 #[test] 409 fn filter_contains_case_insensitive() { 410 let df = make_test_df(); 411 let expr = parse_filter_expr("City~HOUSTON").unwrap(); 412 let result = apply_filters(&df, &[expr]).unwrap(); 413 assert_eq!(result.height(), 1); 414 } 415 416 #[test] 417 fn filter_not_contains() { 418 let df = make_test_df(); 419 let expr = parse_filter_expr("Status!~raft").unwrap(); 420 let result = apply_filters(&df, &[expr]).unwrap(); 421 assert_eq!(result.height(), 3); 422 } 423 424 #[test] 425 fn filter_multiple_and() { 426 let df = make_test_df(); 427 let e1 = parse_filter_expr("State=CA").unwrap(); 428 let e2 = parse_filter_expr("Amount>1000").unwrap(); 429 let result = apply_filters(&df, &[e1, e2]).unwrap(); 430 assert_eq!(result.height(), 1); 431 } 432 433 #[test] 434 fn filter_no_matches_returns_empty() { 435 let df = make_test_df(); 436 let expr = parse_filter_expr("State=ZZ").unwrap(); 437 let result = apply_filters(&df, &[expr]).unwrap(); 438 assert_eq!(result.height(), 0); 439 } 440 441 #[test] 442 fn filter_empty_exprs_returns_all() { 443 let df = make_test_df(); 444 let result = apply_filters(&df, &[]).unwrap(); 445 assert_eq!(result.height(), 5); 446 } 447 448 #[test] 449 fn parse_eq() { 450 let expr = parse_filter_expr("State=CA").unwrap(); 451 assert_eq!(expr.column, "State"); 452 assert_eq!(expr.op, FilterOp::Eq); 453 assert_eq!(expr.value, "CA"); 454 } 455 456 #[test] 457 fn parse_not_eq() { 458 let expr = parse_filter_expr("Status!=Draft").unwrap(); 459 assert_eq!(expr.column, "Status"); 460 assert_eq!(expr.op, FilterOp::NotEq); 461 assert_eq!(expr.value, "Draft"); 462 } 463 464 #[test] 465 fn parse_gt() { 466 let expr = parse_filter_expr("Amount>1000").unwrap(); 467 assert_eq!(expr.column, "Amount"); 468 assert_eq!(expr.op, FilterOp::Gt); 469 assert_eq!(expr.value, "1000"); 470 } 471 472 #[test] 473 fn parse_lt() { 474 let expr = parse_filter_expr("Year<2024").unwrap(); 475 assert_eq!(expr.column, "Year"); 476 assert_eq!(expr.op, FilterOp::Lt); 477 assert_eq!(expr.value, "2024"); 478 } 479 480 #[test] 481 fn parse_gte() { 482 let expr = parse_filter_expr("Score>=90").unwrap(); 483 assert_eq!(expr.column, "Score"); 484 assert_eq!(expr.op, FilterOp::Gte); 485 assert_eq!(expr.value, "90"); 486 } 487 488 #[test] 489 fn parse_lte() { 490 let expr = parse_filter_expr("Price<=50.5").unwrap(); 491 assert_eq!(expr.column, "Price"); 492 assert_eq!(expr.op, FilterOp::Lte); 493 assert_eq!(expr.value, "50.5"); 494 } 495 496 #[test] 497 fn parse_contains() { 498 let expr = parse_filter_expr("Name~john").unwrap(); 499 assert_eq!(expr.column, "Name"); 500 assert_eq!(expr.op, FilterOp::Contains); 501 assert_eq!(expr.value, "john"); 502 } 503 504 #[test] 505 fn parse_not_contains() { 506 let expr = parse_filter_expr("Name!~draft").unwrap(); 507 assert_eq!(expr.column, "Name"); 508 assert_eq!(expr.op, FilterOp::NotContains); 509 assert_eq!(expr.value, "draft"); 510 } 511 512 #[test] 513 fn parse_value_with_equals() { 514 let expr = parse_filter_expr("Formula=A+B=C").unwrap(); 515 assert_eq!(expr.column, "Formula"); 516 assert_eq!(expr.op, FilterOp::Eq); 517 assert_eq!(expr.value, "A+B=C"); 518 } 519 520 #[test] 521 fn parse_empty_value() { 522 let expr = parse_filter_expr("Status=").unwrap(); 523 assert_eq!(expr.column, "Status"); 524 assert_eq!(expr.op, FilterOp::Eq); 525 assert_eq!(expr.value, ""); 526 } 527 528 #[test] 529 fn parse_no_operator_is_err() { 530 assert!(parse_filter_expr("JustAWord").is_err()); 531 } 532 533 #[test] 534 fn parse_no_column_is_err() { 535 assert!(parse_filter_expr("=value").is_err()); 536 } 537 538 #[test] 539 fn parse_sort_desc() { 540 let spec = parse_sort_spec("Amount:desc").unwrap(); 541 assert_eq!(spec.column, "Amount"); 542 assert!(spec.descending); 543 } 544 545 #[test] 546 fn parse_sort_asc() { 547 let spec = parse_sort_spec("Name:asc").unwrap(); 548 assert_eq!(spec.column, "Name"); 549 assert!(!spec.descending); 550 } 551 552 #[test] 553 fn parse_sort_default_asc() { 554 let spec = parse_sort_spec("Name").unwrap(); 555 assert_eq!(spec.column, "Name"); 556 assert!(!spec.descending); 557 } 558 559 #[test] 560 fn parse_sort_bad_dir_is_err() { 561 assert!(parse_sort_spec("Name:up").is_err()); 562 } 563 564 #[test] 565 fn resolve_by_header_name() { 566 let cols = vec!["State".to_string(), "Amount".to_string(), "Year".to_string()]; 567 assert_eq!(resolve_column("Amount", &cols).unwrap(), "Amount"); 568 } 569 570 #[test] 571 fn resolve_by_letter() { 572 let cols = vec!["State".to_string(), "Amount".to_string(), "Year".to_string()]; 573 assert_eq!(resolve_column("B", &cols).unwrap(), "Amount"); 574 } 575 576 #[test] 577 fn resolve_by_letter_lowercase() { 578 let cols = vec!["State".to_string(), "Amount".to_string(), "Year".to_string()]; 579 assert_eq!(resolve_column("b", &cols).unwrap(), "Amount"); 580 } 581 582 #[test] 583 fn resolve_header_takes_priority_over_letter() { 584 let cols = vec!["A".to_string(), "B".to_string()]; 585 assert_eq!(resolve_column("A", &cols).unwrap(), "A"); 586 } 587 588 #[test] 589 fn resolve_case_insensitive_header() { 590 let cols = vec!["State".to_string(), "Amount".to_string()]; 591 assert_eq!(resolve_column("state", &cols).unwrap(), "State"); 592 } 593 594 #[test] 595 fn resolve_unknown_column_is_err() { 596 let cols = vec!["State".to_string(), "Amount".to_string()]; 597 let err = resolve_column("Foo", &cols).unwrap_err(); 598 assert!(err.contains("not found"), "error was: {}", err); 599 } 600 601 #[test] 602 fn resolve_letter_out_of_range_is_err() { 603 let cols = vec!["State".to_string()]; 604 let err = resolve_column("C", &cols).unwrap_err(); 605 assert!(err.contains("not found"), "error was: {}", err); 606 } 607 608 #[test] 609 fn resolve_multiple_columns() { 610 let cols = vec!["State".to_string(), "Amount".to_string(), "Year".to_string()]; 611 let resolved = resolve_columns(&["A".to_string(), "Year".to_string()], &cols).unwrap(); 612 assert_eq!(resolved, vec!["State", "Year"]); 613 } 614 615 #[test] 616 fn sort_ascending() { 617 let df = make_test_df(); 618 let spec = parse_sort_spec("Amount:asc").unwrap(); 619 let result = apply_sort(&df, &spec).unwrap(); 620 let col = result.column("Amount").unwrap().as_materialized_series(); 621 let amounts = col.i64().unwrap(); 622 assert_eq!(amounts.get(0), Some(500)); 623 assert_eq!(amounts.get(4), Some(2000)); 624 } 625 626 #[test] 627 fn sort_descending() { 628 let df = make_test_df(); 629 let spec = parse_sort_spec("Amount:desc").unwrap(); 630 let result = apply_sort(&df, &spec).unwrap(); 631 let col = result.column("Amount").unwrap().as_materialized_series(); 632 let amounts = col.i64().unwrap(); 633 assert_eq!(amounts.get(0), Some(2000)); 634 assert_eq!(amounts.get(4), Some(500)); 635 } 636 637 #[test] 638 fn pipeline_full() { 639 let df = make_test_df(); 640 let opts = FilterOptions { 641 filters: vec![parse_filter_expr("Amount>500").unwrap()], 642 cols: Some(vec!["State".to_string(), "Amount".to_string()]), 643 sort: Some(parse_sort_spec("Amount:desc").unwrap()), 644 limit: Some(2), 645 head: None, 646 tail: None, 647 }; 648 let result = filter_pipeline(df, &opts).unwrap(); 649 assert_eq!(result.height(), 2); 650 assert_eq!(result.width(), 2); 651 let col = result.column("Amount").unwrap().as_materialized_series(); 652 let amounts = col.i64().unwrap(); 653 assert_eq!(amounts.get(0), Some(2000)); 654 assert_eq!(amounts.get(1), Some(1500)); 655 } 656 657 #[test] 658 fn pipeline_head_before_filter() { 659 let df = make_test_df(); // 5 rows: CA/LA, NY/NYC, CA/SF, TX/Houston, NY/Albany 660 let opts = FilterOptions { 661 filters: vec![parse_filter_expr("State=NY").unwrap()], 662 cols: None, 663 sort: None, 664 limit: None, 665 head: Some(3), // Take first 3 rows before filtering 666 tail: None, 667 }; 668 let result = filter_pipeline(df, &opts).unwrap(); 669 // First 3 rows: CA/LA, NY/NYC, CA/SF → only NY/NYC matches 670 assert_eq!(result.height(), 1); 671 } 672 673 #[test] 674 fn pipeline_tail_before_filter() { 675 let df = make_test_df(); // 5 rows 676 let opts = FilterOptions { 677 filters: vec![parse_filter_expr("State=CA").unwrap()], 678 cols: None, 679 sort: None, 680 limit: None, 681 head: None, 682 tail: Some(3), // Last 3 rows before filtering 683 }; 684 let result = filter_pipeline(df, &opts).unwrap(); 685 // Last 3 rows: CA/SF, TX/Houston, NY/Albany → only CA/SF matches 686 assert_eq!(result.height(), 1); 687 } 688 689 #[test] 690 fn pipeline_no_options_returns_all() { 691 let df = make_test_df(); 692 let opts = FilterOptions { 693 filters: vec![], 694 cols: None, 695 sort: None, 696 limit: None, 697 head: None, 698 tail: None, 699 }; 700 let result = filter_pipeline(df, &opts).unwrap(); 701 assert_eq!(result.height(), 5); 702 assert_eq!(result.width(), 5); 703 } 704 705 #[test] 706 fn pipeline_cols_by_letter() { 707 let df = make_test_df(); 708 let opts = FilterOptions { 709 filters: vec![], 710 cols: Some(vec!["A".to_string(), "C".to_string()]), 711 sort: None, 712 limit: None, 713 head: None, 714 tail: None, 715 }; 716 let result = filter_pipeline(df, &opts).unwrap(); 717 assert_eq!(result.width(), 2); 718 let names: Vec<String> = result.get_column_names().iter().map(|s| s.to_string()).collect(); 719 assert_eq!(names, vec!["State", "Amount"]); 720 } 721 722 #[test] 723 fn pipeline_limit_after_filter() { 724 let df = make_test_df(); 725 let opts = FilterOptions { 726 filters: vec![parse_filter_expr("Status=Active").unwrap()], 727 cols: None, 728 sort: None, 729 limit: Some(2), 730 head: None, 731 tail: None, 732 }; 733 let result = filter_pipeline(df, &opts).unwrap(); 734 assert_eq!(result.height(), 2); // 3 Active rows, limited to 2 735 } 736 }