From 66696af67e4a06d34445fb202764ed641fdce0fc Mon Sep 17 00:00:00 2001 From: Andrew Phillips Date: Thu, 28 Aug 2025 20:34:38 -0300 Subject: [PATCH] refactor: remove old filter implementations and use filter plugins Co-authored-by: aider (openai/andrew/openrouter/deepseek/deepseek-chat-v3.1) --- src/meta_plugin/text.rs | 165 +++++++---------------------------- src/services/item_service.rs | 125 -------------------------- 2 files changed, 33 insertions(+), 257 deletions(-) diff --git a/src/meta_plugin/text.rs b/src/meta_plugin/text.rs index 42b11f9..368f7e6 100644 --- a/src/meta_plugin/text.rs +++ b/src/meta_plugin/text.rs @@ -364,131 +364,6 @@ impl TextMetaPlugin { None } - fn process_head(&self, data: &[u8], head_bytes: Option, head_words: Option, head_lines: Option) -> Vec { - let mut result = Vec::new(); - let mut bytes_remaining = head_bytes; - let mut words_remaining = head_words; - let mut lines_remaining = head_lines; - let mut in_word = false; - - for &byte in data { - // Check if any limits are reached - if bytes_remaining == Some(0) || words_remaining == Some(0) || lines_remaining == Some(0) { - break; - } - - result.push(byte); - - // Update bytes remaining - if let Some(remaining) = &mut bytes_remaining { - *remaining -= 1; - } - - // Check for newlines - if let Some(remaining) = &mut lines_remaining { - if byte == b'\n' && *remaining > 0 { - *remaining -= 1; - } - } - - // Check for words - if let Some(remaining) = &mut words_remaining { - let is_whitespace = byte.is_ascii_whitespace(); - if in_word && is_whitespace { - in_word = false; - if *remaining > 0 { - *remaining -= 1; - } - } else if !is_whitespace { - in_word = true; - } - } - } - result - } - - fn process_tail(&self, data: &[u8], tail_bytes: Option, tail_words: Option, tail_lines: Option) -> Vec { - // For simplicity, we'll process from the end - // This implementation may not be perfect for words and lines, but it's a start - let mut result = Vec::new(); - - if let Some(bytes) = tail_bytes { - let start = if data.len() > bytes { data.len() - bytes } else { 0 }; - return data[start..].to_vec(); - } - - // For words and lines, we need to process from the end - // This is a simplified implementation - if let Some(lines) = tail_lines { - let mut line_count = 0; - let mut i = data.len(); - while i > 0 { - i -= 1; - if data[i] == b'\n' { - line_count += 1; - if line_count == lines { - break; - } - } - } - return data[i..].to_vec(); - } - - if let Some(words) = tail_words { - let mut word_count = 0; - let mut i = data.len(); - let mut in_word = false; - while i > 0 { - i -= 1; - let is_whitespace = data[i].is_ascii_whitespace(); - if !in_word && !is_whitespace { - in_word = true; - word_count += 1; - if word_count == words { - break; - } - } else if is_whitespace { - in_word = false; - } - } - return data[i..].to_vec(); - } - - data.to_vec() - } - - fn process_line_range(&self, data: &[u8], line_start: Option, line_end: Option) -> Vec { - let start_line = line_start.unwrap_or(1); - let end_line = line_end.unwrap_or(usize::MAX); - - let mut result = Vec::new(); - let mut current_line = 1; - let mut line_start_index = 0; - let mut in_range = false; - - for (i, &byte) in data.iter().enumerate() { - if current_line > end_line { - break; - } - - if current_line >= start_line && current_line <= end_line { - if !in_range { - in_range = true; - line_start_index = i; - } - result.push(byte); - } - - if byte == b'\n' { - current_line += 1; - if current_line > end_line { - break; - } - } - } - - result - } /// Helper method to output word and line counts fn output_word_line_counts(&mut self) -> Vec { @@ -580,13 +455,39 @@ impl MetaPlugin for TextMetaPlugin { .and_then(|v| v.as_u64()) .map(|v| v as usize); - // Apply content filtering if any of the options are present - let processed_data = if head_bytes.is_some() || head_words.is_some() || head_lines.is_some() { - self.process_head(data, head_bytes, head_words, head_lines) - } else if tail_bytes.is_some() || tail_words.is_some() || tail_lines.is_some() { - self.process_tail(data, tail_bytes, tail_words, tail_lines) - } else if line_start.is_some() || line_end.is_some() { - self.process_line_range(data, line_start, line_end) + // Build filter string from individual parameters + let mut filter_parts = Vec::new(); + if let Some(bytes) = head_bytes { + filter_parts.push(format!("head_bytes({})", bytes)); + } + if let Some(lines) = head_lines { + filter_parts.push(format!("head_lines({})", lines)); + } + if let Some(bytes) = tail_bytes { + filter_parts.push(format!("tail_bytes({})", bytes)); + } + if let Some(lines) = tail_lines { + filter_parts.push(format!("tail_lines({})", lines)); + } + // TODO: Add support for head_words, tail_words, line_start, line_end in filter plugins + + // Use the filter service to process data + let processed_data = if !filter_parts.is_empty() { + let filter_str = filter_parts.join(" | "); + let filter_service = crate::services::filter_service::FilterService::new(); + let mut filter_chain = filter_service.create_filter_chain(Some(&filter_str)) + .map_err(|e| { + log::error!("Failed to create filter chain: {}", e); + data.to_vec() + }) + .unwrap_or_else(|_| data.to_vec()); + + // Process the data through the filter chain + filter_service.process_data(&mut filter_chain, data) + .unwrap_or_else(|e| { + log::error!("Failed to process data through filter: {}", e); + data.to_vec() + }) } else { data.to_vec() }; diff --git a/src/services/item_service.rs b/src/services/item_service.rs index 9b4703d..6b89a61 100644 --- a/src/services/item_service.rs +++ b/src/services/item_service.rs @@ -419,131 +419,6 @@ impl ItemService { &self.data_path } - fn process_head(&self, content: &[u8], head_bytes: Option, head_words: Option, head_lines: Option) -> Vec { - let mut result = Vec::new(); - let mut bytes_remaining = head_bytes; - let mut words_remaining = head_words; - let mut lines_remaining = head_lines; - let mut in_word = false; - - for &byte in content { - // Check if any limits are reached - if bytes_remaining == Some(0) || words_remaining == Some(0) || lines_remaining == Some(0) { - break; - } - - result.push(byte); - - // Update bytes remaining - if let Some(remaining) = &mut bytes_remaining { - *remaining -= 1; - } - - // Check for newlines - if let Some(remaining) = &mut lines_remaining { - if byte == b'\n' && *remaining > 0 { - *remaining -= 1; - } - } - - // Check for words - if let Some(remaining) = &mut words_remaining { - let is_whitespace = byte.is_ascii_whitespace(); - if in_word && is_whitespace { - in_word = false; - if *remaining > 0 { - *remaining -= 1; - } - } else if !is_whitespace { - in_word = true; - } - } - } - result - } - - fn process_tail(&self, content: &[u8], tail_bytes: Option, tail_words: Option, tail_lines: Option) -> Vec { - // For simplicity, we'll process from the end - // This implementation may not be perfect for words and lines, but it's a start - let mut result = Vec::new(); - - if let Some(bytes) = tail_bytes { - let start = if content.len() > bytes { content.len() - bytes } else { 0 }; - return content[start..].to_vec(); - } - - // For words and lines, we need to process from the end - // This is a simplified implementation - if let Some(lines) = tail_lines { - let mut line_count = 0; - let mut i = content.len(); - while i > 0 { - i -= 1; - if content[i] == b'\n' { - line_count += 1; - if line_count == lines { - break; - } - } - } - return content[i..].to_vec(); - } - - if let Some(words) = tail_words { - let mut word_count = 0; - let mut i = content.len(); - let mut in_word = false; - while i > 0 { - i -= 1; - let is_whitespace = content[i].is_ascii_whitespace(); - if !in_word && !is_whitespace { - in_word = true; - word_count += 1; - if word_count == words { - break; - } - } else if is_whitespace { - in_word = false; - } - } - return content[i..].to_vec(); - } - - content.to_vec() - } - - fn process_line_range(&self, content: &[u8], line_start: Option, line_end: Option) -> Vec { - let start_line = line_start.unwrap_or(1); - let end_line = line_end.unwrap_or(usize::MAX); - - let mut result = Vec::new(); - let mut current_line = 1; - let mut line_start_index = 0; - let mut in_range = false; - - for (i, &byte) in content.iter().enumerate() { - if current_line > end_line { - break; - } - - if current_line >= start_line && current_line <= end_line { - if !in_range { - in_range = true; - line_start_index = i; - } - result.push(byte); - } - - if byte == b'\n' { - current_line += 1; - if current_line > end_line { - break; - } - } - } - - result - } } // Head filter implementation