From e2bef42a557730be52f4b39c6c74faa720d66387 Mon Sep 17 00:00:00 2001 From: Andrew Phillips Date: Tue, 26 Aug 2025 19:37:21 -0300 Subject: [PATCH] feat: fix text plugin word and line count tracking Co-authored-by: aider (openai/andrew/openrouter/qwen/qwen3-coder) --- src/meta_plugin/text.rs | 71 ++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/src/meta_plugin/text.rs b/src/meta_plugin/text.rs index ff94123..27043d6 100644 --- a/src/meta_plugin/text.rs +++ b/src/meta_plugin/text.rs @@ -120,31 +120,21 @@ impl MetaPlugin for TextMetaPlugin { }; } - // If we've already determined it's binary, stop processing - if self.is_binary_content == Some(true) { - return MetaPluginResponse { - metadata: Vec::new(), - is_finalized: false, // We might still want to finalize later - }; - } - let mut metadata = Vec::new(); - // Calculate how much data we can still accept - let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len()); - if remaining_capacity > 0 { - // Determine how much data to copy + // If we haven't determined if content is binary yet, build buffer and check + if self.is_binary_content.is_none() { + // Add data to our buffer up to max_buffer_size + let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len()); let bytes_to_take = std::cmp::min(data.len(), remaining_capacity); - - // Add data to our buffer self.buffer.extend_from_slice(&data[..bytes_to_take]); // If we have enough data to make a binary determination, do it now - if self.buffer.len() >= std::cmp::min(1024, self.max_buffer_size) && self.is_binary_content.is_none() { + if self.buffer.len() >= std::cmp::min(1024, self.max_buffer_size) { let is_binary_result = is_binary(&self.buffer); self.is_binary_content = Some(is_binary_result); - // Output text and binary status immediately + // Output text and binary status let text_value = if is_binary_result { "false".to_string() } else { "true".to_string() }; let binary_value = if is_binary_result { "true".to_string() } else { "false".to_string() }; @@ -173,24 +163,34 @@ impl MetaPlugin for TextMetaPlugin { is_finalized: true, }; } - } - - // If content is text, count words and lines - if self.is_binary_content == Some(false) { + + // If it's text, count words and lines for this chunk and stop buffering + self.count_text_stats(&data[..bytes_to_take]); + + // If we've reached our buffer limit, we're finalized + if self.buffer.len() >= self.max_buffer_size { + self.is_finalized = true; + } + } else { + // Still building up buffer, count words and lines for this chunk self.count_text_stats(&data[..bytes_to_take]); } + } else if self.is_binary_content == Some(false) { + // We've already determined it's text, just count words and lines + self.count_text_stats(data); } - - // If we've reached our buffer limit and haven't finalized yet - if self.buffer.len() >= self.max_buffer_size && !self.is_finalized { - // Mark as finalized but don't output word/line counts here + // If is_binary_content == Some(true), we should have already finalized, but just in case: + else if self.is_binary_content == Some(true) { self.is_finalized = true; + return MetaPluginResponse { + metadata: Vec::new(), + is_finalized: true, + }; } - let is_finalized = self.is_finalized; MetaPluginResponse { metadata, - is_finalized, + is_finalized: self.is_finalized, } } @@ -205,7 +205,7 @@ impl MetaPlugin for TextMetaPlugin { let mut metadata = Vec::new(); - // If we haven't determined binary status yet, do it now + // If we haven't determined binary status yet, do it now with whatever we have if self.is_binary_content.is_none() && !self.buffer.is_empty() { let is_binary_result = is_binary(&self.buffer); self.is_binary_content = Some(is_binary_result); @@ -229,18 +229,25 @@ impl MetaPlugin for TextMetaPlugin { ) { metadata.push(meta_data); } + + // If it's binary, we're done + if is_binary_result { + self.is_finalized = true; + return MetaPluginResponse { + metadata, + is_finalized: true, + }; + } } - // If content is text and we have some data, output word and line counts - if self.is_binary_content == Some(false) && (!self.buffer.is_empty() || self.word_count > 0 || self.line_count > 0) { + // If content is text, output word and line counts + if self.is_binary_content == Some(false) { // Process any remaining data in utf8_buffer if !self.utf8_buffer.is_empty() { self.count_text_stats(&[]); } - // If we're still in a word at the end of the stream, we've counted it correctly - // No special handling needed for this case - + // Output word and line counts if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( "text_word_count", self.word_count.to_string(),