From f2d93a28129cd3776ae6776a5ba412f5afd3fc57 Mon Sep 17 00:00:00 2001 From: Andrew Phillips Date: Sat, 14 Mar 2026 16:20:30 -0300 Subject: [PATCH] fix: skip_lines/skip_bytes filters producing empty output on large files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FilteringReader::read() returned Ok(0) (EOF) when a filter consumed a chunk without producing output. Filters like skip_lines need to see multiple chunks before outputting anything — returning 0 prematurely truncated the stream. Loop until the filter produces output or the underlying reader is truly exhausted. --- src/services/item_service.rs | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/services/item_service.rs b/src/services/item_service.rs index 4401f86..c3d4285 100644 --- a/src/services/item_service.rs +++ b/src/services/item_service.rs @@ -812,16 +812,19 @@ impl Read for FilteringReader { return self.reader.read(buf); } - // Read from the original reader into the reusable temp buffer - let to_read = std::cmp::min(buf.len(), self.temp_buf.len()); - let bytes_read = self.reader.read(&mut self.temp_buf[..to_read])?; + // Read chunks and process through the filter chain. + // Loop because filters like skip_lines may consume entire chunks + // without producing output — that is not EOF, we must keep reading. + let chain = self.filter_chain.as_mut().unwrap(); + loop { + let to_read = std::cmp::min(buf.len(), self.temp_buf.len()); + let bytes_read = self.reader.read(&mut self.temp_buf[..to_read])?; - if bytes_read == 0 { - return Ok(0); - } + if bytes_read == 0 { + // True EOF from the underlying reader + return Ok(0); + } - // Process through the filter chain - if let Some(ref mut chain) = self.filter_chain { let mut input_cursor = std::io::Cursor::new(&self.temp_buf[..bytes_read]); chain.filter(&mut input_cursor, &mut self.buffer)?; @@ -829,13 +832,9 @@ impl Read for FilteringReader { let bytes_to_copy = std::cmp::min(buf.len(), self.buffer.len()); buf[..bytes_to_copy].copy_from_slice(&self.buffer[..bytes_to_copy]); self.buffer_pos = bytes_to_copy; - Ok(bytes_to_copy) - } else { - // No data produced by filter, signal to read more - Ok(0) + return Ok(bytes_to_copy); } - } else { - unreachable!() + // Filter produced no output for this chunk — read another } } }