BazerData.jl

Data manipulation utilities for Julia
Log | Files | Refs | README | LICENSE

bench_timeshift.jl (4448B)


      1 #!/usr/bin/env julia
      2 # Benchmark: tlag/tlead performance comparison
      3 #
      4 # Compares three approaches:
      5 #   1. Old linear scan (Date arithmetic interleaved with comparisons)
      6 #   2. New: pre-compute Int64 targets, scan in pure Int64
      7 #   3. Dict-based O(1) lookup
      8 #
      9 # Run with: julia --project test/bench_timeshift.jl
     10 
     11 using BazerData
     12 using Dates
     13 using Random
     14 using Statistics
     15 
     16 # --- Old linear scan (Date objects in hot loop) ---
     17 function tlag_oldscan(x, t_vec, n)
     18     N = length(t_vec)
     19     x_shift = Array{Union{Missing, eltype(x)}}(missing, N)
     20     j = 0
     21     @inbounds for i in 1:N
     22         lagt = t_vec[i] - n
     23         while j < N && t_vec[j + 1] <= lagt
     24             j += 1
     25         end
     26         if j > 0 && t_vec[j] == lagt
     27             x_shift[i] = x[j]
     28         end
     29     end
     30     return x_shift
     31 end
     32 
     33 function tlead_oldscan(x, t_vec, n)
     34     N = length(t_vec)
     35     x_shift = Array{Union{Missing, eltype(x)}}(missing, N)
     36     j = 0
     37     @inbounds for i in 1:N
     38         leadt = t_vec[i] + n
     39         if leadt > t_vec[N]; break; end
     40         while j < N && t_vec[j + 1] < leadt
     41             j += 1
     42         end
     43         if j + 1 <= N && t_vec[j + 1] == leadt
     44             x_shift[i] = x[j + 1]
     45         end
     46     end
     47     return x_shift
     48 end
     49 
     50 # --- Dict-based lookup ---
     51 function tlag_dict(x, t_vec, n)
     52     N = length(t_vec)
     53     x_shift = Array{Union{Missing, eltype(x)}}(missing, N)
     54     lookup = Dict{eltype(t_vec), Int}()
     55     sizehint!(lookup, N)
     56     @inbounds for i in 1:N; lookup[t_vec[i]] = i; end
     57     @inbounds for i in 1:N
     58         idx = get(lookup, t_vec[i] - n, 0)
     59         if idx > 0; x_shift[i] = x[idx]; end
     60     end
     61     return x_shift
     62 end
     63 
     64 
     65 # --- Benchmark harness ---
     66 function bench(f; warmup=3, trials=15)
     67     for _ in 1:warmup; f(); end
     68     GC.gc()
     69     times = Float64[]
     70     for _ in 1:trials
     71         t0 = time_ns()
     72         f()
     73         push!(times, (time_ns() - t0) / 1e6)  # ms
     74     end
     75     return (median=median(times), min=minimum(times))
     76 end
     77 
     78 function report(label, old, new; dict=nothing)
     79     speedup = old.median / new.median
     80     color = speedup >= 1.0 ? "\033[32m" : "\033[31m"
     81     reset = "\033[0m"
     82     line = "  $(rpad(label, 28))  old=$(rpad(round(old.median, digits=2), 8))ms  " *
     83            "new=$(rpad(round(new.median, digits=2), 8))ms  " *
     84            "$(color)$(round(speedup, digits=2))x$(reset)"
     85     if dict !== nothing
     86         ds = old.median / dict.median
     87         dc = ds >= 1.0 ? "\033[32m" : "\033[31m"
     88         line *= "  dict=$(rpad(round(dict.median, digits=2), 8))ms $(dc)$(round(ds, digits=2))x$(reset)"
     89     end
     90     println(line)
     91 end
     92 
     93 
     94 # --- Generate test data ---
     95 function make_daily_dates(n; gap_prob=0.1, seed=42)
     96     Random.seed!(seed)
     97     dates = Vector{Date}(undef, n)
     98     d = Date(2000, 1, 1)
     99     for i in 1:n
    100         dates[i] = d
    101         d += Day(rand() < gap_prob ? rand(2:5) : 1)
    102     end
    103     return dates
    104 end
    105 
    106 function make_integers(n; gap_prob=0.1, seed=42)
    107     Random.seed!(seed)
    108     ts = Vector{Int}(undef, n)
    109     t = 1
    110     for i in 1:n
    111         ts[i] = t
    112         t += rand() < gap_prob ? rand(2:5) : 1
    113     end
    114     return ts
    115 end
    116 
    117 
    118 # --- Run benchmarks ---
    119 println("\n" * "="^80)
    120 println("  TimeShift Benchmark")
    121 println("  old = linear scan on Date objects")
    122 println("  new = pre-compute Int64 targets, scan in Int64")
    123 println("  dict = Dict{T,Int} lookup")
    124 println("="^80)
    125 
    126 for N in [100_000, 1_000_000]
    127     println("\n--- N = $(N รท 1000)K elements ---")
    128 
    129     dates = make_daily_dates(N)
    130     ints  = make_integers(N)
    131     x_f   = rand(N)
    132     x_i   = rand(1:1000, N)
    133 
    134     println("\n  tlag:")
    135     for (lbl, t, x, n) in [
    136         ("Int,   n=1",        ints,  x_i, 1),
    137         ("Int,   n=365",      ints,  x_i, 365),
    138         ("Date,  n=Day(1)",   dates, x_f, Day(1)),
    139         ("Date,  n=Month(1)", dates, x_f, Month(1)),
    140         ("Date,  n=Year(1)",  dates, x_f, Year(1)),
    141     ]
    142         old  = bench(() -> tlag_oldscan(x, t, n))
    143         new  = bench(() -> tlag(x, t; n=n, checksorted=false))
    144         dict = bench(() -> tlag_dict(x, t, n))
    145         report(lbl, old, new; dict=dict)
    146     end
    147 
    148     println("\n  tlead:")
    149     for (lbl, t, x, n) in [
    150         ("Int,   n=1",        ints,  x_i, 1),
    151         ("Date,  n=Day(1)",   dates, x_f, Day(1)),
    152         ("Date,  n=Month(1)", dates, x_f, Month(1)),
    153         ("Date,  n=Year(1)",  dates, x_f, Year(1)),
    154     ]
    155         old = bench(() -> tlead_oldscan(x, t, n))
    156         new = bench(() -> tlead(x, t; n=n, checksorted=false))
    157         report(lbl, old, new)
    158     end
    159 end
    160 
    161 println("\n" * "="^80)