fix: skip_lines/skip_bytes filters producing empty output on large files

FilteringReader::read() returned Ok(0) (EOF) when a filter consumed a
chunk without producing output. Filters like skip_lines need to see
multiple chunks before outputting anything — returning 0 prematurely
truncated the stream. Loop until the filter produces output or the
underlying reader is truly exhausted.
This commit is contained in:
2026-03-14 16:20:30 -03:00
parent 0af74000d2
commit f2d93a2812

View File

@@ -812,16 +812,19 @@ impl<R: Read> Read for FilteringReader<R> {
return self.reader.read(buf);
}
// Read from the original reader into the reusable temp buffer
let to_read = std::cmp::min(buf.len(), self.temp_buf.len());
let bytes_read = self.reader.read(&mut self.temp_buf[..to_read])?;
// Read chunks and process through the filter chain.
// Loop because filters like skip_lines may consume entire chunks
// without producing output — that is not EOF, we must keep reading.
let chain = self.filter_chain.as_mut().unwrap();
loop {
let to_read = std::cmp::min(buf.len(), self.temp_buf.len());
let bytes_read = self.reader.read(&mut self.temp_buf[..to_read])?;
if bytes_read == 0 {
return Ok(0);
}
if bytes_read == 0 {
// True EOF from the underlying reader
return Ok(0);
}
// Process through the filter chain
if let Some(ref mut chain) = self.filter_chain {
let mut input_cursor = std::io::Cursor::new(&self.temp_buf[..bytes_read]);
chain.filter(&mut input_cursor, &mut self.buffer)?;
@@ -829,13 +832,9 @@ impl<R: Read> Read for FilteringReader<R> {
let bytes_to_copy = std::cmp::min(buf.len(), self.buffer.len());
buf[..bytes_to_copy].copy_from_slice(&self.buffer[..bytes_to_copy]);
self.buffer_pos = bytes_to_copy;
Ok(bytes_to_copy)
} else {
// No data produced by filter, signal to read more
Ok(0)
return Ok(bytes_to_copy);
}
} else {
unreachable!()
// Filter produced no output for this chunk — read another
}
}
}