From 0ad8f3ccfac4672f239537512d36e2c4472ef212 Mon Sep 17 00:00:00 2001 From: Andrew Phillips Date: Tue, 26 Aug 2025 19:40:23 -0300 Subject: [PATCH] refactor: change buffer to Option> and drop it after binary detection Co-authored-by: aider (openai/andrew/openrouter/qwen/qwen3-coder) --- src/meta_plugin/text.rs | 174 +++++++++++++++++++++------------------- 1 file changed, 93 insertions(+), 81 deletions(-) diff --git a/src/meta_plugin/text.rs b/src/meta_plugin/text.rs index 27043d6..ed15dc9 100644 --- a/src/meta_plugin/text.rs +++ b/src/meta_plugin/text.rs @@ -4,7 +4,7 @@ use crate::meta_plugin::{MetaPlugin, MetaPluginResponse}; #[derive(Debug, Clone, Default)] pub struct TextMetaPlugin { - buffer: Vec, + buffer: Option>, max_buffer_size: usize, is_finalized: bool, word_count: usize, @@ -37,7 +37,7 @@ impl TextMetaPlugin { .unwrap_or(PIPESIZE as u64) as usize; TextMetaPlugin { - buffer: Vec::new(), + buffer: Some(Vec::new()), max_buffer_size, is_finalized: false, word_count: 0, @@ -124,56 +124,60 @@ impl MetaPlugin for TextMetaPlugin { // If we haven't determined if content is binary yet, build buffer and check if self.is_binary_content.is_none() { - // Add data to our buffer up to max_buffer_size - let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len()); - let bytes_to_take = std::cmp::min(data.len(), remaining_capacity); - self.buffer.extend_from_slice(&data[..bytes_to_take]); - - // If we have enough data to make a binary determination, do it now - if self.buffer.len() >= std::cmp::min(1024, self.max_buffer_size) { - let is_binary_result = is_binary(&self.buffer); - self.is_binary_content = Some(is_binary_result); + if let Some(buffer) = &mut self.buffer { + // Add data to our buffer up to max_buffer_size + let remaining_capacity = self.max_buffer_size.saturating_sub(buffer.len()); + let bytes_to_take = std::cmp::min(data.len(), remaining_capacity); + buffer.extend_from_slice(&data[..bytes_to_take]); - // Output text and binary status - let text_value = if is_binary_result { "false".to_string() } else { "true".to_string() }; - let binary_value = if is_binary_result { "true".to_string() } else { "false".to_string() }; - - // Use process_metadata_outputs to handle output mapping - if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( - "text", - text_value, - self.base.outputs() - ) { - metadata.push(meta_data); + // If we have enough data to make a binary determination, do it now + if buffer.len() >= std::cmp::min(1024, self.max_buffer_size) { + let is_binary_result = is_binary(buffer); + self.is_binary_content = Some(is_binary_result); + + // Output text and binary status + let text_value = if is_binary_result { "false".to_string() } else { "true".to_string() }; + let binary_value = if is_binary_result { "true".to_string() } else { "false".to_string() }; + + // Use process_metadata_outputs to handle output mapping + if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( + "text", + text_value, + self.base.outputs() + ) { + metadata.push(meta_data); + } + + if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( + "binary", + binary_value, + self.base.outputs() + ) { + metadata.push(meta_data); + } + + // If it's binary, we're done with this plugin + if is_binary_result { + self.buffer = None; // Drop the buffer + self.is_finalized = true; + return MetaPluginResponse { + metadata, + is_finalized: true, + }; + } + + // If it's text, count words and lines for this chunk and stop buffering + self.count_text_stats(&data[..bytes_to_take]); + + // If we've reached our buffer limit, drop the buffer and finalize + if buffer.len() >= self.max_buffer_size { + self.buffer = None; // Drop the buffer + self.is_finalized = true; + } + } else { + // Still building up buffer, count words and lines for this chunk + self.count_text_stats(&data[..bytes_to_take]); } - - if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( - "binary", - binary_value, - self.base.outputs() - ) { - metadata.push(meta_data); - } - - // If it's binary, we're done with this plugin - if is_binary_result { - self.is_finalized = true; - return MetaPluginResponse { - metadata, - is_finalized: true, - }; - } - - // If it's text, count words and lines for this chunk and stop buffering - self.count_text_stats(&data[..bytes_to_take]); - - // If we've reached our buffer limit, we're finalized - if self.buffer.len() >= self.max_buffer_size { - self.is_finalized = true; - } - } else { - // Still building up buffer, count words and lines for this chunk - self.count_text_stats(&data[..bytes_to_take]); } } else if self.is_binary_content == Some(false) { // We've already determined it's text, just count words and lines @@ -206,37 +210,42 @@ impl MetaPlugin for TextMetaPlugin { let mut metadata = Vec::new(); // If we haven't determined binary status yet, do it now with whatever we have - if self.is_binary_content.is_none() && !self.buffer.is_empty() { - let is_binary_result = is_binary(&self.buffer); - self.is_binary_content = Some(is_binary_result); - - // Output text and binary status - let text_value = if is_binary_result { "false".to_string() } else { "true".to_string() }; - let binary_value = if is_binary_result { "true".to_string() } else { "false".to_string() }; - - if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( - "text", - text_value, - self.base.outputs() - ) { - metadata.push(meta_data); - } - - if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( - "binary", - binary_value, - self.base.outputs() - ) { - metadata.push(meta_data); - } - - // If it's binary, we're done - if is_binary_result { - self.is_finalized = true; - return MetaPluginResponse { - metadata, - is_finalized: true, - }; + if self.is_binary_content.is_none() { + if let Some(buffer) = &self.buffer { + if !buffer.is_empty() { + let is_binary_result = is_binary(buffer); + self.is_binary_content = Some(is_binary_result); + + // Output text and binary status + let text_value = if is_binary_result { "false".to_string() } else { "true".to_string() }; + let binary_value = if is_binary_result { "true".to_string() } else { "false".to_string() }; + + if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( + "text", + text_value, + self.base.outputs() + ) { + metadata.push(meta_data); + } + + if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( + "binary", + binary_value, + self.base.outputs() + ) { + metadata.push(meta_data); + } + + // If it's binary, we're done + if is_binary_result { + self.buffer = None; // Drop the buffer + self.is_finalized = true; + return MetaPluginResponse { + metadata, + is_finalized: true, + }; + } + } } } @@ -265,6 +274,9 @@ impl MetaPlugin for TextMetaPlugin { } } + // Drop the buffer since we're done with it + self.buffer = None; + // Mark as finalized self.is_finalized = true;