feat: fix text plugin word and line count tracking
Co-authored-by: aider (openai/andrew/openrouter/qwen/qwen3-coder) <aider@aider.chat>
This commit is contained in:
@@ -120,31 +120,21 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
};
|
||||
}
|
||||
|
||||
// If we've already determined it's binary, stop processing
|
||||
if self.is_binary_content == Some(true) {
|
||||
return MetaPluginResponse {
|
||||
metadata: Vec::new(),
|
||||
is_finalized: false, // We might still want to finalize later
|
||||
};
|
||||
}
|
||||
|
||||
let mut metadata = Vec::new();
|
||||
|
||||
// Calculate how much data we can still accept
|
||||
let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len());
|
||||
if remaining_capacity > 0 {
|
||||
// Determine how much data to copy
|
||||
// If we haven't determined if content is binary yet, build buffer and check
|
||||
if self.is_binary_content.is_none() {
|
||||
// Add data to our buffer up to max_buffer_size
|
||||
let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len());
|
||||
let bytes_to_take = std::cmp::min(data.len(), remaining_capacity);
|
||||
|
||||
// Add data to our buffer
|
||||
self.buffer.extend_from_slice(&data[..bytes_to_take]);
|
||||
|
||||
// If we have enough data to make a binary determination, do it now
|
||||
if self.buffer.len() >= std::cmp::min(1024, self.max_buffer_size) && self.is_binary_content.is_none() {
|
||||
if self.buffer.len() >= std::cmp::min(1024, self.max_buffer_size) {
|
||||
let is_binary_result = is_binary(&self.buffer);
|
||||
self.is_binary_content = Some(is_binary_result);
|
||||
|
||||
// Output text and binary status immediately
|
||||
// Output text and binary status
|
||||
let text_value = if is_binary_result { "false".to_string() } else { "true".to_string() };
|
||||
let binary_value = if is_binary_result { "true".to_string() } else { "false".to_string() };
|
||||
|
||||
@@ -173,24 +163,34 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
is_finalized: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// If content is text, count words and lines
|
||||
if self.is_binary_content == Some(false) {
|
||||
// If it's text, count words and lines for this chunk and stop buffering
|
||||
self.count_text_stats(&data[..bytes_to_take]);
|
||||
|
||||
// If we've reached our buffer limit, we're finalized
|
||||
if self.buffer.len() >= self.max_buffer_size {
|
||||
self.is_finalized = true;
|
||||
}
|
||||
} else {
|
||||
// Still building up buffer, count words and lines for this chunk
|
||||
self.count_text_stats(&data[..bytes_to_take]);
|
||||
}
|
||||
} else if self.is_binary_content == Some(false) {
|
||||
// We've already determined it's text, just count words and lines
|
||||
self.count_text_stats(data);
|
||||
}
|
||||
|
||||
// If we've reached our buffer limit and haven't finalized yet
|
||||
if self.buffer.len() >= self.max_buffer_size && !self.is_finalized {
|
||||
// Mark as finalized but don't output word/line counts here
|
||||
// If is_binary_content == Some(true), we should have already finalized, but just in case:
|
||||
else if self.is_binary_content == Some(true) {
|
||||
self.is_finalized = true;
|
||||
return MetaPluginResponse {
|
||||
metadata: Vec::new(),
|
||||
is_finalized: true,
|
||||
};
|
||||
}
|
||||
|
||||
let is_finalized = self.is_finalized;
|
||||
MetaPluginResponse {
|
||||
metadata,
|
||||
is_finalized,
|
||||
is_finalized: self.is_finalized,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -205,7 +205,7 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
|
||||
let mut metadata = Vec::new();
|
||||
|
||||
// If we haven't determined binary status yet, do it now
|
||||
// If we haven't determined binary status yet, do it now with whatever we have
|
||||
if self.is_binary_content.is_none() && !self.buffer.is_empty() {
|
||||
let is_binary_result = is_binary(&self.buffer);
|
||||
self.is_binary_content = Some(is_binary_result);
|
||||
@@ -229,18 +229,25 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
) {
|
||||
metadata.push(meta_data);
|
||||
}
|
||||
|
||||
// If it's binary, we're done
|
||||
if is_binary_result {
|
||||
self.is_finalized = true;
|
||||
return MetaPluginResponse {
|
||||
metadata,
|
||||
is_finalized: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// If content is text and we have some data, output word and line counts
|
||||
if self.is_binary_content == Some(false) && (!self.buffer.is_empty() || self.word_count > 0 || self.line_count > 0) {
|
||||
// If content is text, output word and line counts
|
||||
if self.is_binary_content == Some(false) {
|
||||
// Process any remaining data in utf8_buffer
|
||||
if !self.utf8_buffer.is_empty() {
|
||||
self.count_text_stats(&[]);
|
||||
}
|
||||
|
||||
// If we're still in a word at the end of the stream, we've counted it correctly
|
||||
// No special handling needed for this case
|
||||
|
||||
// Output word and line counts
|
||||
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
|
||||
"text_word_count",
|
||||
self.word_count.to_string(),
|
||||
|
||||
Reference in New Issue
Block a user