feat: fix text plugin word and line count tracking

Co-authored-by: aider (openai/andrew/openrouter/qwen/qwen3-coder) <aider@aider.chat>
This commit is contained in:
Andrew Phillips
2025-08-26 19:37:21 -03:00
parent a620db8cfe
commit e2bef42a55

View File

@@ -120,31 +120,21 @@ impl MetaPlugin for TextMetaPlugin {
};
}
// If we've already determined it's binary, stop processing
if self.is_binary_content == Some(true) {
return MetaPluginResponse {
metadata: Vec::new(),
is_finalized: false, // We might still want to finalize later
};
}
let mut metadata = Vec::new();
// Calculate how much data we can still accept
let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len());
if remaining_capacity > 0 {
// Determine how much data to copy
// If we haven't determined if content is binary yet, build buffer and check
if self.is_binary_content.is_none() {
// Add data to our buffer up to max_buffer_size
let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len());
let bytes_to_take = std::cmp::min(data.len(), remaining_capacity);
// Add data to our buffer
self.buffer.extend_from_slice(&data[..bytes_to_take]);
// If we have enough data to make a binary determination, do it now
if self.buffer.len() >= std::cmp::min(1024, self.max_buffer_size) && self.is_binary_content.is_none() {
if self.buffer.len() >= std::cmp::min(1024, self.max_buffer_size) {
let is_binary_result = is_binary(&self.buffer);
self.is_binary_content = Some(is_binary_result);
// Output text and binary status immediately
// Output text and binary status
let text_value = if is_binary_result { "false".to_string() } else { "true".to_string() };
let binary_value = if is_binary_result { "true".to_string() } else { "false".to_string() };
@@ -173,24 +163,34 @@ impl MetaPlugin for TextMetaPlugin {
is_finalized: true,
};
}
}
// If content is text, count words and lines
if self.is_binary_content == Some(false) {
// If it's text, count words and lines for this chunk and stop buffering
self.count_text_stats(&data[..bytes_to_take]);
// If we've reached our buffer limit, we're finalized
if self.buffer.len() >= self.max_buffer_size {
self.is_finalized = true;
}
} else {
// Still building up buffer, count words and lines for this chunk
self.count_text_stats(&data[..bytes_to_take]);
}
} else if self.is_binary_content == Some(false) {
// We've already determined it's text, just count words and lines
self.count_text_stats(data);
}
// If we've reached our buffer limit and haven't finalized yet
if self.buffer.len() >= self.max_buffer_size && !self.is_finalized {
// Mark as finalized but don't output word/line counts here
// If is_binary_content == Some(true), we should have already finalized, but just in case:
else if self.is_binary_content == Some(true) {
self.is_finalized = true;
return MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
let is_finalized = self.is_finalized;
MetaPluginResponse {
metadata,
is_finalized,
is_finalized: self.is_finalized,
}
}
@@ -205,7 +205,7 @@ impl MetaPlugin for TextMetaPlugin {
let mut metadata = Vec::new();
// If we haven't determined binary status yet, do it now
// If we haven't determined binary status yet, do it now with whatever we have
if self.is_binary_content.is_none() && !self.buffer.is_empty() {
let is_binary_result = is_binary(&self.buffer);
self.is_binary_content = Some(is_binary_result);
@@ -229,18 +229,25 @@ impl MetaPlugin for TextMetaPlugin {
) {
metadata.push(meta_data);
}
// If it's binary, we're done
if is_binary_result {
self.is_finalized = true;
return MetaPluginResponse {
metadata,
is_finalized: true,
};
}
}
// If content is text and we have some data, output word and line counts
if self.is_binary_content == Some(false) && (!self.buffer.is_empty() || self.word_count > 0 || self.line_count > 0) {
// If content is text, output word and line counts
if self.is_binary_content == Some(false) {
// Process any remaining data in utf8_buffer
if !self.utf8_buffer.is_empty() {
self.count_text_stats(&[]);
}
// If we're still in a word at the end of the stream, we've counted it correctly
// No special handling needed for this case
// Output word and line counts
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
"text_word_count",
self.word_count.to_string(),