refactor: change buffer to Option<Vec<u8>> and drop it after binary detection

Co-authored-by: aider (openai/andrew/openrouter/qwen/qwen3-coder) <aider@aider.chat>
This commit is contained in:
Andrew Phillips
2025-08-26 19:40:23 -03:00
parent e2bef42a55
commit 0ad8f3ccfa

View File

@@ -4,7 +4,7 @@ use crate::meta_plugin::{MetaPlugin, MetaPluginResponse};
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone, Default)]
pub struct TextMetaPlugin { pub struct TextMetaPlugin {
buffer: Vec<u8>, buffer: Option<Vec<u8>>,
max_buffer_size: usize, max_buffer_size: usize,
is_finalized: bool, is_finalized: bool,
word_count: usize, word_count: usize,
@@ -37,7 +37,7 @@ impl TextMetaPlugin {
.unwrap_or(PIPESIZE as u64) as usize; .unwrap_or(PIPESIZE as u64) as usize;
TextMetaPlugin { TextMetaPlugin {
buffer: Vec::new(), buffer: Some(Vec::new()),
max_buffer_size, max_buffer_size,
is_finalized: false, is_finalized: false,
word_count: 0, word_count: 0,
@@ -124,56 +124,60 @@ impl MetaPlugin for TextMetaPlugin {
// If we haven't determined if content is binary yet, build buffer and check // If we haven't determined if content is binary yet, build buffer and check
if self.is_binary_content.is_none() { if self.is_binary_content.is_none() {
// Add data to our buffer up to max_buffer_size if let Some(buffer) = &mut self.buffer {
let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len()); // Add data to our buffer up to max_buffer_size
let bytes_to_take = std::cmp::min(data.len(), remaining_capacity); let remaining_capacity = self.max_buffer_size.saturating_sub(buffer.len());
self.buffer.extend_from_slice(&data[..bytes_to_take]); let bytes_to_take = std::cmp::min(data.len(), remaining_capacity);
buffer.extend_from_slice(&data[..bytes_to_take]);
// If we have enough data to make a binary determination, do it now
if self.buffer.len() >= std::cmp::min(1024, self.max_buffer_size) {
let is_binary_result = is_binary(&self.buffer);
self.is_binary_content = Some(is_binary_result);
// Output text and binary status // If we have enough data to make a binary determination, do it now
let text_value = if is_binary_result { "false".to_string() } else { "true".to_string() }; if buffer.len() >= std::cmp::min(1024, self.max_buffer_size) {
let binary_value = if is_binary_result { "true".to_string() } else { "false".to_string() }; let is_binary_result = is_binary(buffer);
self.is_binary_content = Some(is_binary_result);
// Use process_metadata_outputs to handle output mapping
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( // Output text and binary status
"text", let text_value = if is_binary_result { "false".to_string() } else { "true".to_string() };
text_value, let binary_value = if is_binary_result { "true".to_string() } else { "false".to_string() };
self.base.outputs()
) { // Use process_metadata_outputs to handle output mapping
metadata.push(meta_data); if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
"text",
text_value,
self.base.outputs()
) {
metadata.push(meta_data);
}
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
"binary",
binary_value,
self.base.outputs()
) {
metadata.push(meta_data);
}
// If it's binary, we're done with this plugin
if is_binary_result {
self.buffer = None; // Drop the buffer
self.is_finalized = true;
return MetaPluginResponse {
metadata,
is_finalized: true,
};
}
// If it's text, count words and lines for this chunk and stop buffering
self.count_text_stats(&data[..bytes_to_take]);
// If we've reached our buffer limit, drop the buffer and finalize
if buffer.len() >= self.max_buffer_size {
self.buffer = None; // Drop the buffer
self.is_finalized = true;
}
} else {
// Still building up buffer, count words and lines for this chunk
self.count_text_stats(&data[..bytes_to_take]);
} }
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
"binary",
binary_value,
self.base.outputs()
) {
metadata.push(meta_data);
}
// If it's binary, we're done with this plugin
if is_binary_result {
self.is_finalized = true;
return MetaPluginResponse {
metadata,
is_finalized: true,
};
}
// If it's text, count words and lines for this chunk and stop buffering
self.count_text_stats(&data[..bytes_to_take]);
// If we've reached our buffer limit, we're finalized
if self.buffer.len() >= self.max_buffer_size {
self.is_finalized = true;
}
} else {
// Still building up buffer, count words and lines for this chunk
self.count_text_stats(&data[..bytes_to_take]);
} }
} else if self.is_binary_content == Some(false) { } else if self.is_binary_content == Some(false) {
// We've already determined it's text, just count words and lines // We've already determined it's text, just count words and lines
@@ -206,37 +210,42 @@ impl MetaPlugin for TextMetaPlugin {
let mut metadata = Vec::new(); let mut metadata = Vec::new();
// If we haven't determined binary status yet, do it now with whatever we have // If we haven't determined binary status yet, do it now with whatever we have
if self.is_binary_content.is_none() && !self.buffer.is_empty() { if self.is_binary_content.is_none() {
let is_binary_result = is_binary(&self.buffer); if let Some(buffer) = &self.buffer {
self.is_binary_content = Some(is_binary_result); if !buffer.is_empty() {
let is_binary_result = is_binary(buffer);
// Output text and binary status self.is_binary_content = Some(is_binary_result);
let text_value = if is_binary_result { "false".to_string() } else { "true".to_string() };
let binary_value = if is_binary_result { "true".to_string() } else { "false".to_string() }; // Output text and binary status
let text_value = if is_binary_result { "false".to_string() } else { "true".to_string() };
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( let binary_value = if is_binary_result { "true".to_string() } else { "false".to_string() };
"text",
text_value, if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
self.base.outputs() "text",
) { text_value,
metadata.push(meta_data); self.base.outputs()
} ) {
metadata.push(meta_data);
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( }
"binary",
binary_value, if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
self.base.outputs() "binary",
) { binary_value,
metadata.push(meta_data); self.base.outputs()
} ) {
metadata.push(meta_data);
// If it's binary, we're done }
if is_binary_result {
self.is_finalized = true; // If it's binary, we're done
return MetaPluginResponse { if is_binary_result {
metadata, self.buffer = None; // Drop the buffer
is_finalized: true, self.is_finalized = true;
}; return MetaPluginResponse {
metadata,
is_finalized: true,
};
}
}
} }
} }
@@ -265,6 +274,9 @@ impl MetaPlugin for TextMetaPlugin {
} }
} }
// Drop the buffer since we're done with it
self.buffer = None;
// Mark as finalized // Mark as finalized
self.is_finalized = true; self.is_finalized = true;