commit 745bc554fe070d08001219e7c24b052672fe7acd
parent 867be70d3424213b1250e942d91400be4017ad0a
Author: Erik Loualiche <[email protected]>
Date: Sun, 15 Feb 2026 22:53:26 -0600
deprecate JSONL functions in favor of JSON.jl v1
- Add Base.depwarn to read_jsonl, stream_jsonl, write_jsonl pointing
users to JSON.parse/JSON.json with jsonlines=true
- Remove @show T debug statement left in read_jsonl
- Fix write_jsonl crashing on files in current directory (dirname("") bug)
- Trim docstrings and add deprecation admonitions
- Update JSONL doc page with deprecation banner
Co-Authored-By: Claude Opus 4.6 <[email protected]>
Diffstat:
| M | docs/src/man/read_jsonl.md | | | 108 | ++++++++++++++----------------------------------------------------------------- |
| M | src/JSONLines.jl | | | 121 | ++++++++++++++++++++----------------------------------------------------------- |
2 files changed, 49 insertions(+), 180 deletions(-)
diff --git a/docs/src/man/read_jsonl.md b/docs/src/man/read_jsonl.md
@@ -1,5 +1,13 @@
# Working with JSON Lines Files
+!!! warning "Deprecated"
+ The JSONL functions in BazerUtils (`read_jsonl`, `stream_jsonl`, `write_jsonl`) are deprecated.
+ Use [JSON.jl](https://github.com/JuliaIO/JSON.jl) v1 instead, which has native support:
+ ```julia
+ using JSON
+ data = JSON.parse("data.jsonl"; jsonlines=true) # read
+ JSON.json("out.jsonl", data; jsonlines=true) # write
+ ```
---
@@ -7,21 +15,11 @@
> JSON Lines (JSONL) is a convenient format for storing structured data that may be processed one record at a time. Each line is a valid JSON value, separated by a newline character. This format is ideal for large datasets and streaming applications.
-- **UTF-8 Encoding:** Files must be UTF-8 encoded. Do not include a byte order mark (BOM).
-- **One JSON Value Per Line:** Each line is a valid JSON value (object, array, string, number, boolean, or null). Blank lines are ignored.
-- **Line Separator:** Each line ends with `\n` (or `\r\n`). The last line may or may not end with a newline.
-
-
For more details, see [jsonlines.org](https://jsonlines.org/).
-This is a personal implementation and is not tested for any sort of standard.
-It works fine for my usecase and I try to fix things as I encounter them, but ymmv.
-
---
-## Reading JSON Lines Files
-
-You can use the `read_jsonl` and `stream_jsonl` functions to read JSONL files or streams.
+## Legacy API (deprecated)
### `read_jsonl`
@@ -29,102 +27,34 @@ Reads the entire file or stream into memory and returns a vector of parsed JSON
```julia
using BazerUtils
-import JSON3
data = read_jsonl("data.jsonl")
-# or from an IOBuffer
-buf =
data = read_jsonl(IOBuffer("{\"a\": 1}\n{\"a\": 2}\n"))
data = read_jsonl(IOBuffer("{\"a\": 1}\n{\"a\": 2}\n"); dict_of_json=true)
```
-
-- **Arguments:** `source::Union{AbstractString, IO}`
-- **Returns:** `Vector` of parsed JSON values
-- **Note:** Loads all data into memory. For large files, use `stream_jsonl`.
-
----
-
-
### `stream_jsonl`
-Creates a lazy iterator (Channel) that yields one parsed JSON value at a time, without loading the entire file into memory.
-
-```julia
-stream = stream_jsonl(IOBuffer("{\"a\": 1}\n{\"a\": 2}\n"))
-data = collect(stream)
-BazerUtils._dict_of_json3.(data)
-
-stream = stream_jsonl(IOBuffer("{\"a\": 1}\n{\"a\": 2}\n[1,2,3]"))
-collect(stream) # error because types of vector elements are not all JSON3.Object{}
-stream = stream_jsonl(IOBuffer("{\"a\": 1}\n{\"a\": 2}\n[1,2,3]"), T=Any)
-collect(stream) # default to Vector{Any}
-
-stream = stream_jsonl(IOBuffer("[4,5,6]\n[1,2,3]"), T= JSON3.Array{})
-collect(stream)
-stream = stream_jsonl(IOBuffer("4\n1"), T=Int)
-collect(stream)
-```
+Creates a lazy iterator (Channel) that yields one parsed JSON value at a time.
-Allows iterators
```julia
-first10 = collect(Iterators.take(stream_jsonl("data.jsonl"), 10)) # Collect the first 10 records
-# see tests for other iterators ...
+for record in stream_jsonl("data.jsonl")
+ println(record)
+end
+first10 = collect(Iterators.take(stream_jsonl("data.jsonl"), 10))
```
+### `write_jsonl`
-- **Arguments:** `source::Union{AbstractString, IO}`
-- **Returns:** `Channel` (iterator) of parsed JSON values
-- **Note:** Ideal for large files and streaming workflows.
-
----
-
-## Writing JSON Lines Files
-
-Use `write_jsonl` to write an iterable of JSON-serializable values to a JSONL file.
+Write an iterable of JSON-serializable values to a JSONL file.
```julia
write_jsonl("out.jsonl", [Dict("a"=>1), Dict("b"=>2)])
write_jsonl("out.jsonl.gz", (Dict("i"=>i) for i in 1:100); compress=true)
```
-- **Arguments:**
- - `filename::AbstractString`
- - `data`: iterable of JSON-serializable values
- - `compress::Bool=false`: write gzip-compressed if true or filename ends with `.gz`
-- **Returns:** The filename
-
----
-
-
-## Example: Roundtrip with IOBuffer
-
-Note that there is no stable roundtrip between read and write, because of the way `JSON3` processes record into dictionaries and even when we add the dict flag it is `Symbol => Any`
-
-```julia
-data_string = [Dict("a"=>1), Dict("b"=>2)]
-data_symbol = [Dict(:a=>1), Dict(:b=>2)]
-
-function roundtrip(data)
- buf = IOBuffer()
- for obj in data
- JSON3.write(buf, obj)
- write(buf, '\n')
- end
- seekstart(buf)
- return read_jsonl(buf; dict_of_json=true)
-end
-
-roundtrip(data_string) == data_string
-roundtrip(data_symbol) == data_symbol
-```
-
---
## See Also
-- [`JSON3.jl`](https://github.com/quinnj/JSON3.jl): Fast, flexible JSON parsing and serialization for Julia.
-- [`CodecZlib.jl`](https://github.com/JuliaIO/CodecZlib.jl): Gzip compression support.
-
----
-
-For more advanced usage, see the function docstrings or the test suite. -
\ No newline at end of file
+- [`JSON.jl`](https://github.com/JuliaIO/JSON.jl): The recommended replacement. Use `jsonlines=true` for JSONL support.
+- [`CodecZlib.jl`](https://github.com/JuliaIO/CodecZlib.jl): Gzip compression support.+
\ No newline at end of file
diff --git a/src/JSONLines.jl b/src/JSONLines.jl
@@ -16,46 +16,23 @@
"""
read_jsonl(source::Union{AbstractString, IO}; dict_of_json::Bool=false) -> Vector
+!!! warning "Deprecated"
+ `read_jsonl` is deprecated. Use `JSON.parse(source; jsonlines=true)` from
+ [JSON.jl](https://github.com/JuliaIO/JSON.jl) v1 instead.
+
Read a JSON Lines (.jsonl) file or stream and return all records as a vector.
-This function reads the entire file or IO stream into memory at once, parsing each line as a separate
-JSON value. Empty lines are automatically skipped.
+Each line is parsed as a separate JSON value. Empty lines are skipped.
# Arguments
-- `source::Union{AbstractString, IO}`: Path to the JSON Lines file to read, or an IO stream (e.g., IOBuffer, file handle).
+- `source::Union{AbstractString, IO}`: Path to a JSONL file, or an IO stream.
- `dict_of_json::Bool=false`: If `true` and the parsed type is `JSON3.Object`, convert each record to a `Dict{Symbol,Any}`.
# Returns
-- `Vector`: A vector containing all parsed JSON values from the file or stream.
-
-# Examples
-```julia
-# Read all records from a JSONL file
-data = read_jsonl("data.jsonl")
-
-# Read from an IOBuffer
-buf = IOBuffer("$(JSON3.write(Dict(:a=>1)))\n$(JSON3.write(Dict(:a=>2)))\n")
-data = read_jsonl(buf)
-
-# Convert JSON3.Object records to Dict
-data = read_jsonl("data.jsonl"; dict_of_json=true)
-
-# Access individual records
-first_record = data[1]
-println("First record ID: ", first_record.id)
-```
-
-# Notes
-- This function loads all data into memory, so it may not be suitable for very large files.
-- For large files, consider using `stream_jsonl()` for streaming processing.
-- The function will throw an error if the JSON on any line is malformed.
-- The path must refer to an existing regular file.
-- If `dict_of_json=true`, all records must be of type `JSON3.Object`.
-
-# See Also
-- [`stream_jsonl`](@ref): For memory-efficient streaming of large JSONL files.
+- `Vector`: A vector of parsed JSON values.
"""
function read_jsonl(io::IO; dict_of_json::Bool=false)
+ Base.depwarn("`read_jsonl` is deprecated. Use `JSON.parse(io; jsonlines=true)` from JSON.jl v1 instead.", :read_jsonl)
lines = collect(eachline(io))
nonempty_lines = filter(l -> !isempty(strip(l)), lines)
isempty(nonempty_lines) && return []
@@ -68,7 +45,6 @@ function read_jsonl(io::IO; dict_of_json::Bool=false)
for (i, line) in enumerate(nonempty_lines[2:end])
results[i+1] = JSON3.read(line)
end
- @show T
if dict_of_json && T <: JSON3.Object{}
results = [_dict_of_json3(r) for r in results]
end
@@ -93,59 +69,21 @@ end
"""
stream_jsonl(source::Union{AbstractString, IO}; T::Type=JSON3.Object{}) -> Channel
-Create a lazy iterator (Channel) for reading JSON Lines files record by record.
+!!! warning "Deprecated"
+ `stream_jsonl` is deprecated. Use `JSON.parse(source; jsonlines=true)` from
+ [JSON.jl](https://github.com/JuliaIO/JSON.jl) v1 instead.
-This function returns a Channel that yields JSON objects one at a time without loading
-the entire file into memory. This is memory-efficient for processing large JSONL files.
-Each parsed record is checked to match the specified type `T` (default: `JSON3.Object{}`).
-If a record does not match `T`, an error is thrown.
+Create a lazy Channel iterator for reading JSON Lines files record by record.
# Arguments
-- `source::Union{AbstractString, IO}`: Path to the JSON Lines file to read, or an IO stream (e.g., IOBuffer, file handle).
-- `T::Type=JSON3.Object{}`: The expected type for each parsed record. Use `T=Any` to allow mixed types.
+- `source::Union{AbstractString, IO}`: Path to a JSONL file, or an IO stream.
+- `T::Type=JSON3.Object{}`: Expected type for each record. Use `T=Any` for mixed types.
# Returns
-- `Channel{T}`: A channel that yields parsed JSON objects one at a time.
-
-# Examples
-```julia
-# Process records one at a time (memory efficient)
-for record in stream_jsonl("large_file.jsonl")
- println("Processing record: ", record.id)
-end
-
-# Collect first N records
-first_10 = collect(Iterators.take(stream_jsonl("data.jsonl"), 10))
-
-# Filter and process
-filtered_records = [r for r in stream_jsonl("data.jsonl") if r.score > 0.5]
-
-# Stream from an IOBuffer
-buf = IOBuffer("$(JSON3.write(Dict(:a=>1)))\n$(JSON3.write(Dict(:a=>2)))\n")
-for record in stream_jsonl(buf)
- @show record
-end
-
-# Allow mixed types
-for record in stream_jsonl("data.jsonl"; T=Any)
- @show record
-end
-```
-
-# Notes
-- This is a lazy iterator: records are only read and parsed when requested.
-- Memory usage remains constant regardless of file size.
-- Empty lines are automatically skipped.
-- The Channel is automatically closed when the file or stream is fully read or an error occurs.
-- If JSON parsing fails on any line, the Channel will close and propagate the error.
-- For file paths, the file remains open for the lifetime of the channel.
-- For IO streams, the user is responsible for keeping the IO open while consuming the channel.
-- If a parsed record does not match `T`, an error is thrown. Use `T=Any` to allow mixed types.
-
-# See Also
-- [`read_jsonl`](@ref): For loading entire JSONL files into memory at once.
+- `Channel{T}`: A channel yielding parsed JSON objects one at a time.
"""
function stream_jsonl(io::IO; T::Type=JSON3.Object{})
+ Base.depwarn("`stream_jsonl` is deprecated. Use `JSON.parse(io; jsonlines=true)` from JSON.jl v1 instead.", :stream_jsonl)
lines = Iterators.filter(l -> !isempty(strip(l)), eachline(io))
return Channel{T}() do ch
for line in lines
@@ -160,6 +98,7 @@ end
function stream_jsonl(filename::AbstractString; T::Type=JSON3.Object{})
+ Base.depwarn("`stream_jsonl` is deprecated. Use `JSON.parse(filename; jsonlines=true)` from JSON.jl v1 instead.", :stream_jsonl)
if !isfile(filename)
throw(ArgumentError("File does not exist or is not a regular file: $filename"))
end
@@ -197,29 +136,30 @@ end
function write_jsonl(filename::AbstractString, data; kwargs...)
+ Base.depwarn("`write_jsonl` is deprecated. Use `JSON.json(filename, data; jsonlines=true)` from JSON.jl v1 instead.", :write_jsonl)
write_jsonl(filename, data, iteration_style(data); kwargs...)
end
"""
write_jsonl(filename, data; compress=false)
-Write an iterable of JSON-serializable values to a JSON Lines file.
+!!! warning "Deprecated"
+ `write_jsonl` is deprecated. Use `JSON.json(filename, data; jsonlines=true)` from
+ [JSON.jl](https://github.com/JuliaIO/JSON.jl) v1 instead.
-- `filename`: Output file path (if ends with `.gz` or `compress=true`, writes gzip-compressed)
-- `data`: An iterable (e.g., Vector, generator) of values (Dict, Array, String, Number, Bool, nothing, etc.)
+Write an iterable of JSON-serializable values to a JSON Lines file.
-Returns the filename.
+# Arguments
+- `filename`: Output file path (writes gzip-compressed if ends with `.gz` or `compress=true`)
+- `data`: An iterable of JSON-serializable values
+- `compress::Bool=false`: Force gzip compression
-# Example
-```julia
-write_jsonl("out.jsonl", [Dict("a"=>1), Dict("b"=>2)])
-write_jsonl("out.jsonl.gz", (Dict("i"=>i) for i in 1:10^6))
-```
+# Returns
+The filename.
"""
function write_jsonl(filename::AbstractString, data, ::TableIteration; compress::Bool=false)
- # @warn "Implementation for tables"
dir = dirname(filename)
- if !isdir(dir)
+ if !isempty(dir) && !isdir(dir)
throw(ArgumentError("Directory does not exist: $dir"))
end
isgz = compress || endswith(filename, ".gz")
@@ -237,9 +177,8 @@ function write_jsonl(filename::AbstractString, data, ::TableIteration; compress:
end
function write_jsonl(filename::AbstractString, data, ::DirectIteration; compress::Bool=false)
- # @warn "Implementation for direct iteration"
dir = dirname(filename)
- if !isdir(dir)
+ if !isempty(dir) && !isdir(dir)
throw(ArgumentError("Directory does not exist: $dir"))
end
isgz = compress || endswith(filename, ".gz")