Skip to content

Commit

Permalink
The current parser assumes that the data fields in a (sub)chunk fills
Browse files Browse the repository at this point in the history
up the entire chunk size. I have encountered WAV files with chunk
padding, that means the next chunk starts only after a few unused
padding bytes. Before reading the next subchunk header, you should
explicitly seek to the next subchunk start as indicated by the actual
chunk size in the chunk header instead on relying on things lining up.

I've fixed the issue (dancasimiro#90), and while I was at it I've also fixed the
restriction of the format and data chunk order.
  • Loading branch information
ATell-SoundTheory authored and mgkuhn committed Oct 3, 2020
1 parent e182794 commit 89a4036
Showing 1 changed file with 13 additions and 7 deletions.
20 changes: 13 additions & 7 deletions src/WAV.jl
Original file line number Diff line number Diff line change
Expand Up @@ -758,20 +758,20 @@ function wavread(io::IO; subrange=(:), format="double")
sample_rate = Float32(0.0)
opt = WAVChunk[]

# Note: This assumes that the format chunk is written in the file before the data chunk. The
# specification does not require this assumption, but most real files are written that way.

# Subtract the size of the format field from chunk_size; now it holds the size
# of all the sub-chunks
chunk_size -= 4
# GitHub Issue #18: Check if there is enough data to read another chunk
subchunk_header_size = 4 + sizeof(UInt32)
fmt = WAVFormat()
data_position = 0
data_size = 0
while chunk_size >= subchunk_header_size
# Read subchunk ID and size
subchunk_id = Vector{UInt8}(undef, 4)
read!(io, subchunk_id)
subchunk_size = read_le(io, UInt32)
nextchunk_start = position(io) + subchunk_size
if subchunk_size > chunk_size
chunk_size = 0
break
Expand All @@ -784,15 +784,21 @@ function wavread(io::IO; subrange=(:), format="double")
nbits = bits_per_sample(fmt)
push!(opt, WAVChunk(fmt))
elseif subchunk_id == b"data"
if format == "size"
return convert(Int, subchunk_size / fmt.block_align), convert(Int, fmt.nchannels)
end
samples = read_data(io, subchunk_size, fmt, format, make_range(subrange))
data_position = position(io)
data_size = subchunk_size
else
subchunk_data = Vector{UInt8}(undef, subchunk_size)
read!(io, subchunk_data)
push!(opt, WAVChunk(Symbol(subchunk_id), subchunk_data))
end
seek(io, nextchunk_start)
end
if data_size > 0 && data_position > 0
seek(io, data_position)
if format == "size"
return convert(Int, data_size / fmt.block_align), convert(Int, fmt.nchannels)
end
samples = read_data(io, data_size, fmt, format, make_range(subrange))
end
return samples, sample_rate, nbits, opt
end
Expand Down

0 comments on commit 89a4036

Please sign in to comment.