wrds-download

TUI/CLI tool for browsing and downloading WRDS data
Log | Files | Refs | README

commit a2b36d1f63d137ed45e6baaff0023c432a8aad56
parent 6b65bccfd07d30b22a6c8ca105cfb5445021f4e5
Author: Erik Loualiche <[email protected]>
Date:   Fri, 20 Feb 2026 09:42:56 -0600

Replace DuckDB CGo export with pure Go pgx + parquet-go pipeline

Drop the go-duckdb CGo dependency that embedded the entire DuckDB C++
library (55MB binary). Export now streams rows directly from Postgres
via pgx and writes Parquet (parquet-go with ZSTD) or CSV (encoding/csv).
This eliminates CGo, halves the binary size (~28MB, 19MB stripped), and
enables CGO_ENABLED=0 cross-compilation.

Co-Authored-By: Claude Opus 4.6 <[email protected]>

Diffstat:
Mcmd/download.go | 2+-
Mgo.mod | 30+++++++++++++++---------------
Mgo.sum | 57+++++++++++++++++++++++++++++++++++++--------------------
Minternal/db/client.go | 11-----------
Dinternal/export/duckdb.go | 99-------------------------------------------------------------------------------
Ainternal/export/export.go | 325+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minternal/tui/app.go | 2+-
7 files changed, 379 insertions(+), 147 deletions(-)

diff --git a/cmd/download.go b/cmd/download.go @@ -83,7 +83,7 @@ func buildQuery() (string, error) { if dlColumns != "" && dlColumns != "*" { sel = dlColumns } - q := fmt.Sprintf("SELECT %s FROM wrds.%s.%s", sel, dlSchema, dlTable) + q := fmt.Sprintf("SELECT %s FROM %s.%s", sel, dlSchema, dlTable) if dlWhere != "" { q += " WHERE " + dlWhere diff --git a/go.mod b/go.mod @@ -3,13 +3,19 @@ module github.com/eloualiche/wrds-download go 1.25.0 require ( - github.com/apache/arrow-go/v18 v18.1.0 // indirect + github.com/charmbracelet/bubbles v1.0.0 + github.com/charmbracelet/bubbletea v1.3.10 + github.com/charmbracelet/lipgloss v1.1.0 + github.com/jackc/pgx/v5 v5.8.0 + github.com/parquet-go/parquet-go v0.27.0 + github.com/spf13/cobra v1.10.2 +) + +require ( + github.com/andybalholm/brotli v1.1.1 // indirect github.com/atotto/clipboard v0.1.4 // indirect github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect - github.com/charmbracelet/bubbles v1.0.0 // indirect - github.com/charmbracelet/bubbletea v1.3.10 // indirect github.com/charmbracelet/colorprofile v0.4.1 // indirect - github.com/charmbracelet/lipgloss v1.1.0 // indirect github.com/charmbracelet/x/ansi v0.11.6 // indirect github.com/charmbracelet/x/cellbuf v0.0.15 // indirect github.com/charmbracelet/x/term v0.2.2 // indirect @@ -17,37 +23,31 @@ require ( github.com/clipperhouse/stringish v0.1.1 // indirect github.com/clipperhouse/uax29/v2 v2.5.0 // indirect github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect - github.com/go-viper/mapstructure/v2 v2.2.1 // indirect - github.com/goccy/go-json v0.10.5 // indirect - github.com/google/flatbuffers v25.1.24+incompatible // indirect + github.com/google/go-cmp v0.6.0 // indirect github.com/google/uuid v1.6.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect - github.com/jackc/pgx/v5 v5.8.0 // indirect github.com/jackc/puddle/v2 v2.2.2 // indirect github.com/klauspost/compress v1.17.11 // indirect - github.com/klauspost/cpuid/v2 v2.2.9 // indirect github.com/lucasb-eyer/go-colorful v1.3.0 // indirect - github.com/marcboeker/go-duckdb v1.8.5 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-localereader v0.0.1 // indirect github.com/mattn/go-runewidth v0.0.19 // indirect github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect github.com/muesli/cancelreader v0.2.2 // indirect github.com/muesli/termenv v0.16.0 // indirect + github.com/parquet-go/bitpack v1.0.0 // indirect + github.com/parquet-go/jsonlite v1.0.0 // indirect github.com/pierrec/lz4/v4 v4.1.22 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/sahilm/fuzzy v0.1.1 // indirect - github.com/spf13/cobra v1.10.2 // indirect github.com/spf13/pflag v1.0.9 // indirect + github.com/twpayne/go-geom v1.6.1 // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect - github.com/zeebo/xxh3 v1.0.2 // indirect golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c // indirect - golang.org/x/mod v0.27.0 // indirect golang.org/x/sync v0.17.0 // indirect golang.org/x/sys v0.38.0 // indirect golang.org/x/text v0.29.0 // indirect - golang.org/x/tools v0.36.0 // indirect - golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect + google.golang.org/protobuf v1.36.1 // indirect ) diff --git a/go.sum b/go.sum @@ -1,9 +1,17 @@ -github.com/apache/arrow-go/v18 v18.1.0 h1:agLwJUiVuwXZdwPYVrlITfx7bndULJ/dggbnLFgDp/Y= -github.com/apache/arrow-go/v18 v18.1.0/go.mod h1:tigU/sIgKNXaesf5d7Y95jBBKS5KsxTqYBKXFsvKzo0= +github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU= +github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU= +github.com/alecthomas/assert/v2 v2.10.0 h1:jjRCHsj6hBJhkmhznrCzoNpbA3zqy0fYiUcYZP/GkPY= +github.com/alecthomas/assert/v2 v2.10.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= +github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc= +github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= +github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= +github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= +github.com/aymanbagabas/go-udiff v0.3.1 h1:LV+qyBQ2pqe0u42ZsUEtPiCaUoqgA9gYRDs3vj1nolY= +github.com/aymanbagabas/go-udiff v0.3.1/go.mod h1:G0fsKmG+P6ylD0r6N/KgQD/nWzgfnl8ZBcNLgcbrw8E= github.com/charmbracelet/bubbles v1.0.0 h1:12J8/ak/uCZEMQ6KU7pcfwceyjLlWsDLAxB5fXonfvc= github.com/charmbracelet/bubbles v1.0.0/go.mod h1:9d/Zd5GdnauMI5ivUIVisuEm3ave1XwXtD1ckyV6r3E= github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw= @@ -16,6 +24,8 @@ github.com/charmbracelet/x/ansi v0.11.6 h1:GhV21SiDz/45W9AnV2R61xZMRri5NlLnl6CVF github.com/charmbracelet/x/ansi v0.11.6/go.mod h1:2JNYLgQUsyqaiLovhU2Rv/pb8r6ydXKS3NIttu3VGZQ= github.com/charmbracelet/x/cellbuf v0.0.15 h1:ur3pZy0o6z/R7EylET877CBxaiE1Sp1GMxoFPAIztPI= github.com/charmbracelet/x/cellbuf v0.0.15/go.mod h1:J1YVbR7MUuEGIFPCaaZ96KDl5NoS0DAWkskup+mOY+Q= +github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91 h1:payRxjMjKgx2PaCWLZ4p3ro9y97+TVLZNaRZgJwSVDQ= +github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91/go.mod h1:wDlXFlCrmJ8J+swcL/MnGUuYnqgQdW9rhSD61oNMb6U= github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk= github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI= github.com/clipperhouse/displaywidth v0.9.0 h1:Qb4KOhYwRiN3viMv1v/3cTBlz3AcAZX3+y9OLhMtAtA= @@ -26,16 +36,16 @@ github.com/clipperhouse/uax29/v2 v2.5.0 h1:x7T0T4eTHDONxFJsL94uKNKPHrclyFI0lm7+w github.com/clipperhouse/uax29/v2 v2.5.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4= github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM= -github.com/go-viper/mapstructure/v2 v2.2.1 h1:ZAaOCxANMuZx5RCeg0mBdEZk7DZasvvZIxtHqx8aGss= -github.com/go-viper/mapstructure/v2 v2.2.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= -github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= -github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= -github.com/google/flatbuffers v25.1.24+incompatible h1:4wPqL3K7GzBd1CwyhSd3usxLKOaJN/AC6puCca6Jm7o= -github.com/google/flatbuffers v25.1.24+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= +github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= @@ -48,12 +58,10 @@ github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= -github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY= -github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag= github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= -github.com/marcboeker/go-duckdb v1.8.5 h1:tkYp+TANippy0DaIOP5OEfBEwbUINqiFqgwMQ44jME0= -github.com/marcboeker/go-duckdb v1.8.5/go.mod h1:6mK7+WQE4P4u5AFLvVBmhFxY5fvhymFptghgJX6B+/8= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4= @@ -66,8 +74,15 @@ github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELU github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc= github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk= +github.com/parquet-go/bitpack v1.0.0 h1:AUqzlKzPPXf2bCdjfj4sTeacrUwsT7NlcYDMUQxPcQA= +github.com/parquet-go/bitpack v1.0.0/go.mod h1:XnVk9TH+O40eOOmvpAVZ7K2ocQFrQwysLMnc6M/8lgs= +github.com/parquet-go/jsonlite v1.0.0 h1:87QNdi56wOfsE5bdgas0vRzHPxfJgzrXGml1zZdd7VU= +github.com/parquet-go/jsonlite v1.0.0/go.mod h1:nDjpkpL4EOtqs6NQugUsi0Rleq9sW/OtC1NnZEnxzF0= +github.com/parquet-go/parquet-go v0.27.0 h1:vHWK2xaHbj+v1DYps03yDRpEsdtOeKbhiXUaixoPb3g= +github.com/parquet-go/parquet-go v0.27.0/go.mod h1:navtkAYr2LGoJVp141oXPlO/sxLvaOe3la2JEoD8+rg= github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU= github.com/pierrec/lz4/v4 v4.1.22/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= @@ -81,15 +96,17 @@ github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/twpayne/go-geom v1.6.1 h1:iLE+Opv0Ihm/ABIcvQFGIiFBXd76oBIar9drAwHFhR4= +github.com/twpayne/go-geom v1.6.1/go.mod h1:Kr+Nly6BswFsKM5sd31YaoWS5PeDDH2NftJTK7Gd028= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= -github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= -github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c h1:KL/ZBHXgKGVmuZBZ01Lt57yE5ws8ZPSkkihmEyq7FXc= golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c/go.mod h1:tujkw807nyEEAamNbDrEGzRav+ilXA7PCRAd6xsmwiU= -golang.org/x/mod v0.27.0 h1:kb+q2PyFnEADO2IEF935ehFUXlWiNjJWtRNgBLSfbxQ= -golang.org/x/mod v0.27.0/go.mod h1:rWI627Fq0DEoudcK+MBkNkCe0EetEaDSwJJkCcjpazc= golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -98,9 +115,9 @@ golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= -golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= -golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= -golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY= -golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= +google.golang.org/protobuf v1.36.1 h1:yBPeRvTftaleIgM3PZ/WBIZ7XM/eEYAaEyCwvyjq/gk= +google.golang.org/protobuf v1.36.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/db/client.go b/internal/db/client.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "os" - "strconv" "github.com/jackc/pgx/v5/pgxpool" ) @@ -41,16 +40,6 @@ func DSNFromEnv() (string, error) { return dsn, nil } -// PortFromEnv returns the port as an integer (for DuckDB attach). -func PortFromEnv() int { - p := getenv("PGPORT", "9737") - n, _ := strconv.Atoi(p) - if n == 0 { - n = 9737 - } - return n -} - func getenv(key, fallback string) string { if v := os.Getenv(key); v != "" { return v diff --git a/internal/export/duckdb.go b/internal/export/duckdb.go @@ -1,99 +0,0 @@ -package export - -import ( - "database/sql" - "fmt" - "os" - "strings" - - _ "github.com/marcboeker/go-duckdb" -) - -// Options controls the export behaviour. -type Options struct { - Format string // "parquet" or "csv" -} - -// Export runs query against the WRDS PostgreSQL instance and writes output to outPath. -// Format is determined by opts.Format (default: parquet). -func Export(query, outPath string, opts Options) error { - format := strings.ToLower(opts.Format) - if format == "" { - if strings.HasSuffix(strings.ToLower(outPath), ".csv") { - format = "csv" - } else { - format = "parquet" - } - } - - db, err := sql.Open("duckdb", "") - if err != nil { - return fmt.Errorf("open duckdb: %w", err) - } - defer db.Close() - - // Install and load postgres extension. - for _, stmt := range []string{ - "INSTALL postgres;", - "LOAD postgres;", - } { - if _, err := db.Exec(stmt); err != nil { - // Ignore "already installed" errors. - if !strings.Contains(err.Error(), "already") { - return fmt.Errorf("%s: %w", stmt, err) - } - } - } - - // Build the ATTACH string from env. - attachDSN := buildAttachDSN() - attach := fmt.Sprintf("ATTACH '%s' AS wrds (TYPE POSTGRES, READ_ONLY);", attachDSN) - if _, err := db.Exec(attach); err != nil { - return fmt.Errorf("attach: %w", err) - } - - // Wrap query in a COPY statement. - var copySQL string - switch format { - case "csv": - copySQL = fmt.Sprintf("COPY (%s) TO '%s' (FORMAT CSV, HEADER true);", query, outPath) - default: - copySQL = fmt.Sprintf("COPY (%s) TO '%s' (FORMAT PARQUET, COMPRESSION ZSTD);", query, outPath) - } - - if _, err := db.Exec(copySQL); err != nil { - return fmt.Errorf("copy: %w", err) - } - return nil -} - -// buildAttachDSN builds the postgres attach DSN string from standard PG env vars. -func buildAttachDSN() string { - host := getenv("PGHOST", "wrds-pgdata.wharton.upenn.edu") - port := getenv("PGPORT", "9737") - user := getenv("PGUSER", "") - password := getenv("PGPASSWORD", "") - dbname := getenv("PGDATABASE", user) - - // DuckDB postgres attach DSN format. - parts := []string{ - "host=" + host, - "port=" + port, - "dbname=" + dbname, - "sslmode=require", - } - if user != "" { - parts = append(parts, "user="+user) - } - if password != "" { - parts = append(parts, "password="+password) - } - return strings.Join(parts, " ") -} - -func getenv(key, fallback string) string { - if v := os.Getenv(key); v != "" { - return v - } - return fallback -} diff --git a/internal/export/export.go b/internal/export/export.go @@ -0,0 +1,325 @@ +package export + +import ( + "context" + "encoding/csv" + "fmt" + "math/big" + "os" + "strings" + "time" + + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" + "github.com/jackc/pgx/v5/pgtype" + "github.com/parquet-go/parquet-go" + "github.com/parquet-go/parquet-go/compress/zstd" + + "github.com/eloualiche/wrds-download/internal/db" +) + +// Options controls the export behaviour. +type Options struct { + Format string // "parquet" or "csv" +} + +const rowGroupSize = 10_000 + +// Export runs query against the WRDS PostgreSQL instance and writes output to outPath. +// Format is determined by opts.Format (default: parquet). +func Export(query, outPath string, opts Options) error { + format := strings.ToLower(opts.Format) + if format == "" { + if strings.HasSuffix(strings.ToLower(outPath), ".csv") { + format = "csv" + } else { + format = "parquet" + } + } + + dsn, err := db.DSNFromEnv() + if err != nil { + return fmt.Errorf("dsn: %w", err) + } + + ctx := context.Background() + conn, err := pgx.Connect(ctx, dsn) + if err != nil { + return fmt.Errorf("connect: %w", err) + } + defer conn.Close(ctx) + + rows, err := conn.Query(ctx, query) + if err != nil { + return fmt.Errorf("query: %w", err) + } + defer rows.Close() + + switch format { + case "csv": + return writeCSV(rows, outPath) + default: + return writeParquet(rows, outPath) + } +} + +// writeCSV streams rows into a CSV file with a header row. +func writeCSV(rows pgx.Rows, outPath string) error { + f, err := os.Create(outPath) + if err != nil { + return fmt.Errorf("create csv: %w", err) + } + defer f.Close() + + w := csv.NewWriter(f) + defer w.Flush() + + fds := rows.FieldDescriptions() + header := make([]string, len(fds)) + for i, fd := range fds { + header[i] = fd.Name + } + if err := w.Write(header); err != nil { + return fmt.Errorf("write header: %w", err) + } + + record := make([]string, len(fds)) + for rows.Next() { + vals, err := rows.Values() + if err != nil { + return fmt.Errorf("scan row: %w", err) + } + for i, v := range vals { + record[i] = formatValue(v) + } + if err := w.Write(record); err != nil { + return fmt.Errorf("write row: %w", err) + } + } + if err := rows.Err(); err != nil { + return fmt.Errorf("rows: %w", err) + } + + w.Flush() + return w.Error() +} + +// writeParquet streams rows into a Parquet file using parquet-go. +func writeParquet(rows pgx.Rows, outPath string) error { + fds := rows.FieldDescriptions() + + schema, colTypes := buildParquetSchema(fds) + + f, err := os.Create(outPath) + if err != nil { + return fmt.Errorf("create parquet: %w", err) + } + defer f.Close() + + writer := parquet.NewGenericWriter[map[string]any](f, + schema, + parquet.Compression(&zstd.Codec{}), + ) + + buf := make([]map[string]any, 0, rowGroupSize) + + for rows.Next() { + vals, err := rows.Values() + if err != nil { + return fmt.Errorf("scan row: %w", err) + } + + row := make(map[string]any, len(fds)) + for i, v := range vals { + row[fds[i].Name] = convertValue(v, colTypes[i]) + } + buf = append(buf, row) + + if len(buf) >= rowGroupSize { + if _, err := writer.Write(buf); err != nil { + return fmt.Errorf("write row group: %w", err) + } + buf = buf[:0] + } + } + if err := rows.Err(); err != nil { + return fmt.Errorf("rows: %w", err) + } + + // Flush remaining rows. + if len(buf) > 0 { + if _, err := writer.Write(buf); err != nil { + return fmt.Errorf("write final rows: %w", err) + } + } + + return writer.Close() +} + +// colType tags how we convert PG values for Parquet. +type colType int + +const ( + colString colType = iota + colBool + colInt32 + colInt64 + colFloat32 + colFloat64 + colDate // days since epoch → int32 + colTimestamp // microseconds since epoch → int64 +) + +// buildParquetSchema maps PG field descriptors to a parquet schema. +func buildParquetSchema(fds []pgconn.FieldDescription) (*parquet.Schema, []colType) { + cols := make([]colType, len(fds)) + group := make(parquet.Group, len(fds)) + + for i, fd := range fds { + var node parquet.Node + + switch fd.DataTypeOID { + case 16: // bool + cols[i] = colBool + node = parquet.Optional(parquet.Leaf(parquet.BooleanType)) + case 21: // int2 + cols[i] = colInt32 + node = parquet.Optional(parquet.Leaf(parquet.Int32Type)) + case 23: // int4 + cols[i] = colInt32 + node = parquet.Optional(parquet.Leaf(parquet.Int32Type)) + case 20: // int8 + cols[i] = colInt64 + node = parquet.Optional(parquet.Leaf(parquet.Int64Type)) + case 700: // float4 + cols[i] = colFloat32 + node = parquet.Optional(parquet.Leaf(parquet.FloatType)) + case 701: // float8 + cols[i] = colFloat64 + node = parquet.Optional(parquet.Leaf(parquet.DoubleType)) + case 1082: // date + cols[i] = colDate + node = parquet.Optional(parquet.Date()) + case 1114, 1184: // timestamp, timestamptz + cols[i] = colTimestamp + node = parquet.Optional(parquet.Timestamp(parquet.Microsecond)) + default: + // text (25), varchar (1043), char (18, 1042), numeric (1700), etc. + cols[i] = colString + node = parquet.Optional(parquet.String()) + } + + group[fd.Name] = node + } + + return parquet.NewSchema("wrds", group), cols +} + +var epoch = time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) + +// convertValue converts a pgx-scanned value to the appropriate Go type for parquet-go. +func convertValue(v any, ct colType) any { + if v == nil { + return nil + } + + switch ct { + case colBool: + if b, ok := v.(bool); ok { + return b + } + case colInt32: + switch n := v.(type) { + case int16: + return int32(n) + case int32: + return n + case int64: + return int32(n) + } + case colInt64: + switch n := v.(type) { + case int64: + return n + case int32: + return int64(n) + case int16: + return int64(n) + } + case colFloat32: + if f, ok := v.(float32); ok { + return f + } + if f, ok := v.(float64); ok { + return float32(f) + } + case colFloat64: + if f, ok := v.(float64); ok { + return f + } + if f, ok := v.(float32); ok { + return float64(f) + } + case colDate: + if t, ok := v.(time.Time); ok { + days := int32(t.Sub(epoch).Hours() / 24) + return days + } + case colTimestamp: + if t, ok := v.(time.Time); ok { + return t.Sub(epoch).Microseconds() + } + case colString: + return formatValue(v) + } + + // Fallback: stringify. + return formatValue(v) +} + +// formatValue converts any value to its string representation. +func formatValue(v any) string { + if v == nil { + return "" + } + switch val := v.(type) { + case string: + return val + case []byte: + return string(val) + case time.Time: + if val.Hour() == 0 && val.Minute() == 0 && val.Second() == 0 && val.Nanosecond() == 0 { + return val.Format("2006-01-02") + } + return val.Format(time.RFC3339) + case pgtype.Numeric: + if !val.Valid { + return "" + } + if val.NaN { + return "NaN" + } + if val.InfinityModifier == pgtype.Infinity { + return "Infinity" + } + if val.InfinityModifier == pgtype.NegativeInfinity { + return "-Infinity" + } + // Convert to big.Float for string representation. + bi := val.Int + if bi == nil { + bi = new(big.Int) + } + bf := new(big.Float).SetInt(bi) + if val.Exp < 0 { + divisor := new(big.Float).SetInt(new(big.Int).Exp(big.NewInt(10), big.NewInt(int64(-val.Exp)), nil)) + bf.Quo(bf, divisor) + } else if val.Exp > 0 { + multiplier := new(big.Float).SetInt(new(big.Int).Exp(big.NewInt(10), big.NewInt(int64(val.Exp)), nil)) + bf.Mul(bf, multiplier) + } + return bf.Text('f', -1) + default: + return fmt.Sprintf("%v", val) + } +} diff --git a/internal/tui/app.go b/internal/tui/app.go @@ -233,7 +233,7 @@ func (a *App) startDownload(msg DlSubmitMsg) tea.Cmd { if msg.Columns != "" && msg.Columns != "*" { sel = msg.Columns } - query := fmt.Sprintf("SELECT %s FROM wrds.%s.%s", sel, msg.Schema, msg.Table) + query := fmt.Sprintf("SELECT %s FROM %s.%s", sel, msg.Schema, msg.Table) if msg.Where != "" { query += " WHERE " + msg.Where }