feat: add head/tail/line range processing for text meta plugin
Co-authored-by: aider (openai/andrew/openrouter/deepseek/deepseek-chat-v3.1) <aider@aider.chat>
This commit is contained in:
@@ -363,6 +363,132 @@ impl TextMetaPlugin {
|
|||||||
}
|
}
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn process_head(&self, data: &[u8], head_bytes: Option<usize>, head_words: Option<usize>, head_lines: Option<usize>) -> Vec<u8> {
|
||||||
|
let mut result = Vec::new();
|
||||||
|
let mut bytes_remaining = head_bytes;
|
||||||
|
let mut words_remaining = head_words;
|
||||||
|
let mut lines_remaining = head_lines;
|
||||||
|
let mut in_word = false;
|
||||||
|
|
||||||
|
for &byte in data {
|
||||||
|
// Check if any limits are reached
|
||||||
|
if bytes_remaining == Some(0) || words_remaining == Some(0) || lines_remaining == Some(0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
result.push(byte);
|
||||||
|
|
||||||
|
// Update bytes remaining
|
||||||
|
if let Some(remaining) = &mut bytes_remaining {
|
||||||
|
*remaining -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for newlines
|
||||||
|
if let Some(remaining) = &mut lines_remaining {
|
||||||
|
if byte == b'\n' && *remaining > 0 {
|
||||||
|
*remaining -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for words
|
||||||
|
if let Some(remaining) = &mut words_remaining {
|
||||||
|
let is_whitespace = byte.is_ascii_whitespace();
|
||||||
|
if in_word && is_whitespace {
|
||||||
|
in_word = false;
|
||||||
|
if *remaining > 0 {
|
||||||
|
*remaining -= 1;
|
||||||
|
}
|
||||||
|
} else if !is_whitespace {
|
||||||
|
in_word = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
fn process_tail(&self, data: &[u8], tail_bytes: Option<usize>, tail_words: Option<usize>, tail_lines: Option<usize>) -> Vec<u8> {
|
||||||
|
// For simplicity, we'll process from the end
|
||||||
|
// This implementation may not be perfect for words and lines, but it's a start
|
||||||
|
let mut result = Vec::new();
|
||||||
|
|
||||||
|
if let Some(bytes) = tail_bytes {
|
||||||
|
let start = if data.len() > bytes { data.len() - bytes } else { 0 };
|
||||||
|
return data[start..].to_vec();
|
||||||
|
}
|
||||||
|
|
||||||
|
// For words and lines, we need to process from the end
|
||||||
|
// This is a simplified implementation
|
||||||
|
if let Some(lines) = tail_lines {
|
||||||
|
let mut line_count = 0;
|
||||||
|
let mut i = data.len();
|
||||||
|
while i > 0 {
|
||||||
|
i -= 1;
|
||||||
|
if data[i] == b'\n' {
|
||||||
|
line_count += 1;
|
||||||
|
if line_count == lines {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return data[i..].to_vec();
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(words) = tail_words {
|
||||||
|
let mut word_count = 0;
|
||||||
|
let mut i = data.len();
|
||||||
|
let mut in_word = false;
|
||||||
|
while i > 0 {
|
||||||
|
i -= 1;
|
||||||
|
let is_whitespace = data[i].is_ascii_whitespace();
|
||||||
|
if !in_word && !is_whitespace {
|
||||||
|
in_word = true;
|
||||||
|
word_count += 1;
|
||||||
|
if word_count == words {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else if is_whitespace {
|
||||||
|
in_word = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return data[i..].to_vec();
|
||||||
|
}
|
||||||
|
|
||||||
|
data.to_vec()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn process_line_range(&self, data: &[u8], line_start: Option<usize>, line_end: Option<usize>) -> Vec<u8> {
|
||||||
|
let start_line = line_start.unwrap_or(1);
|
||||||
|
let end_line = line_end.unwrap_or(usize::MAX);
|
||||||
|
|
||||||
|
let mut result = Vec::new();
|
||||||
|
let mut current_line = 1;
|
||||||
|
let mut line_start_index = 0;
|
||||||
|
let mut in_range = false;
|
||||||
|
|
||||||
|
for (i, &byte) in data.iter().enumerate() {
|
||||||
|
if current_line > end_line {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if current_line >= start_line && current_line <= end_line {
|
||||||
|
if !in_range {
|
||||||
|
in_range = true;
|
||||||
|
line_start_index = i;
|
||||||
|
}
|
||||||
|
result.push(byte);
|
||||||
|
}
|
||||||
|
|
||||||
|
if byte == b'\n' {
|
||||||
|
current_line += 1;
|
||||||
|
if current_line > end_line {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
/// Helper method to output word and line counts
|
/// Helper method to output word and line counts
|
||||||
fn output_word_line_counts(&mut self) -> Vec<crate::meta_plugin::MetaData> {
|
fn output_word_line_counts(&mut self) -> Vec<crate::meta_plugin::MetaData> {
|
||||||
@@ -427,13 +553,51 @@ impl MetaPlugin for TextMetaPlugin {
|
|||||||
|
|
||||||
let mut metadata = Vec::new();
|
let mut metadata = Vec::new();
|
||||||
|
|
||||||
|
// Check if we have head/tail/line range options that would affect processing
|
||||||
|
// These options come from the base plugin's options
|
||||||
|
let head_bytes = self.base.options.get("head_bytes")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|v| v as usize);
|
||||||
|
let head_words = self.base.options.get("head_words")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|v| v as usize);
|
||||||
|
let head_lines = self.base.options.get("head_lines")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|v| v as usize);
|
||||||
|
let tail_bytes = self.base.options.get("tail_bytes")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|v| v as usize);
|
||||||
|
let tail_words = self.base.options.get("tail_words")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|v| v as usize);
|
||||||
|
let tail_lines = self.base.options.get("tail_lines")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|v| v as usize);
|
||||||
|
let line_start = self.base.options.get("line_start")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|v| v as usize);
|
||||||
|
let line_end = self.base.options.get("line_end")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|v| v as usize);
|
||||||
|
|
||||||
|
// Apply content filtering if any of the options are present
|
||||||
|
let processed_data = if head_bytes.is_some() || head_words.is_some() || head_lines.is_some() {
|
||||||
|
self.process_head(data, head_bytes, head_words, head_lines)
|
||||||
|
} else if tail_bytes.is_some() || tail_words.is_some() || tail_lines.is_some() {
|
||||||
|
self.process_tail(data, tail_bytes, tail_words, tail_lines)
|
||||||
|
} else if line_start.is_some() || line_end.is_some() {
|
||||||
|
self.process_line_range(data, line_start, line_end)
|
||||||
|
} else {
|
||||||
|
data.to_vec()
|
||||||
|
};
|
||||||
|
|
||||||
// If we haven't determined if content is binary yet, build buffer and check
|
// If we haven't determined if content is binary yet, build buffer and check
|
||||||
if self.is_binary_content.is_none() {
|
if self.is_binary_content.is_none() {
|
||||||
let should_finalize = if let Some(ref mut buffer) = self.buffer {
|
let should_finalize = if let Some(ref mut buffer) = self.buffer {
|
||||||
// Add data to our buffer up to max_buffer_size
|
// Add processed data to our buffer up to max_buffer_size
|
||||||
let remaining_capacity = self.max_buffer_size.saturating_sub(buffer.len());
|
let remaining_capacity = self.max_buffer_size.saturating_sub(buffer.len());
|
||||||
let bytes_to_take = std::cmp::min(data.len(), remaining_capacity);
|
let bytes_to_take = std::cmp::min(processed_data.len(), remaining_capacity);
|
||||||
buffer.extend_from_slice(&data[..bytes_to_take]);
|
buffer.extend_from_slice(&processed_data[..bytes_to_take]);
|
||||||
|
|
||||||
// If we have enough data to make a binary determination, do it now
|
// If we have enough data to make a binary determination, do it now
|
||||||
let buffer_len = buffer.len();
|
let buffer_len = buffer.len();
|
||||||
@@ -455,7 +619,7 @@ impl MetaPlugin for TextMetaPlugin {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// If it's text, count words and lines for this chunk
|
// If it's text, count words and lines for this chunk
|
||||||
self.count_text_stats(&data[..bytes_to_take]);
|
self.count_text_stats(&processed_data[..bytes_to_take]);
|
||||||
|
|
||||||
// If we've reached our buffer limit, drop the buffer to save memory
|
// If we've reached our buffer limit, drop the buffer to save memory
|
||||||
// But don't finalize yet - we need to keep counting words and lines
|
// But don't finalize yet - we need to keep counting words and lines
|
||||||
@@ -465,7 +629,7 @@ impl MetaPlugin for TextMetaPlugin {
|
|||||||
false // Never finalize here for text content
|
false // Never finalize here for text content
|
||||||
} else {
|
} else {
|
||||||
// Still building up buffer, count words and lines for this chunk
|
// Still building up buffer, count words and lines for this chunk
|
||||||
self.count_text_stats(&data[..bytes_to_take]);
|
self.count_text_stats(&processed_data[..bytes_to_take]);
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -480,7 +644,7 @@ impl MetaPlugin for TextMetaPlugin {
|
|||||||
}
|
}
|
||||||
} else if self.is_binary_content == Some(false) {
|
} else if self.is_binary_content == Some(false) {
|
||||||
// We've already determined it's text, just count words and lines
|
// We've already determined it's text, just count words and lines
|
||||||
self.count_text_stats(data);
|
self.count_text_stats(&processed_data);
|
||||||
}
|
}
|
||||||
// If is_binary_content == Some(true), we should have already finalized, but just in case:
|
// If is_binary_content == Some(true), we should have already finalized, but just in case:
|
||||||
else if self.is_binary_content == Some(true) {
|
else if self.is_binary_content == Some(true) {
|
||||||
@@ -508,13 +672,49 @@ impl MetaPlugin for TextMetaPlugin {
|
|||||||
|
|
||||||
let mut metadata = Vec::new();
|
let mut metadata = Vec::new();
|
||||||
|
|
||||||
|
// Check if we have head/tail/line range options
|
||||||
|
let head_bytes = self.base.options.get("head_bytes")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|v| v as usize);
|
||||||
|
let head_words = self.base.options.get("head_words")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|v| v as usize);
|
||||||
|
let head_lines = self.base.options.get("head_lines")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|v| v as usize);
|
||||||
|
let tail_bytes = self.base.options.get("tail_bytes")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|v| v as usize);
|
||||||
|
let tail_words = self.base.options.get("tail_words")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|v| v as usize);
|
||||||
|
let tail_lines = self.base.options.get("tail_lines")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|v| v as usize);
|
||||||
|
let line_start = self.base.options.get("line_start")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|v| v as usize);
|
||||||
|
let line_end = self.base.options.get("line_end")
|
||||||
|
.and_then(|v| v.as_u64())
|
||||||
|
.map(|v| v as usize);
|
||||||
|
|
||||||
// If we haven't determined binary status yet, do it now with whatever we have
|
// If we haven't determined binary status yet, do it now with whatever we have
|
||||||
if self.is_binary_content.is_none() {
|
if self.is_binary_content.is_none() {
|
||||||
if let Some(buffer) = &self.buffer {
|
if let Some(buffer) = &self.buffer {
|
||||||
if !buffer.is_empty() {
|
if !buffer.is_empty() {
|
||||||
// Clone the buffer data for binary detection to avoid borrowing conflicts
|
// Apply content filtering to the buffer if needed
|
||||||
let buffer_clone = buffer.clone();
|
let processed_buffer = if head_bytes.is_some() || head_words.is_some() || head_lines.is_some() {
|
||||||
let (binary_metadata, is_binary) = self.perform_binary_detection(&buffer_clone);
|
self.process_head(buffer, head_bytes, head_words, head_lines)
|
||||||
|
} else if tail_bytes.is_some() || tail_words.is_some() || tail_lines.is_some() {
|
||||||
|
self.process_tail(buffer, tail_bytes, tail_words, tail_lines)
|
||||||
|
} else if line_start.is_some() || line_end.is_some() {
|
||||||
|
self.process_line_range(buffer, line_start, line_end)
|
||||||
|
} else {
|
||||||
|
buffer.clone()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Clone the processed buffer data for binary detection
|
||||||
|
let (binary_metadata, is_binary) = self.perform_binary_detection(&processed_buffer);
|
||||||
metadata.extend(binary_metadata);
|
metadata.extend(binary_metadata);
|
||||||
self.is_binary_content = Some(is_binary);
|
self.is_binary_content = Some(is_binary);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user