From 66696af67e4a06d34445fb202764ed641fdce0fc Mon Sep 17 00:00:00 2001
From: Andrew Phillips <andrew.phillips2@canada.ca>
Date: Thu, 28 Aug 2025 20:34:38 -0300
Subject: [PATCH] refactor: remove old filter implementations and use filter
 plugins

Co-authored-by: aider (openai/andrew/openrouter/deepseek/deepseek-chat-v3.1) <aider@aider.chat>
---
 src/meta_plugin/text.rs      | 165 +++++++----------------------------
 src/services/item_service.rs | 125 --------------------------
 2 files changed, 33 insertions(+), 257 deletions(-)
diff --git a/src/meta_plugin/text.rs b/src/meta_plugin/text.rs
index 42b11f9..368f7e6 100644
--- a/src/meta_plugin/text.rs
+++ b/src/meta_plugin/text.rs
@@ -364,131 +364,6 @@ impl TextMetaPlugin {
         None
     }
 
-    fn process_head(&self, data: &[u8], head_bytes: Option<usize>, head_words: Option<usize>, head_lines: Option<usize>) -> Vec<u8> {
-        let mut result = Vec::new();
-        let mut bytes_remaining = head_bytes;
-        let mut words_remaining = head_words;
-        let mut lines_remaining = head_lines;
-        let mut in_word = false;
-        
-        for &byte in data {
-            // Check if any limits are reached
-            if bytes_remaining == Some(0) || words_remaining == Some(0) || lines_remaining == Some(0) {
-                break;
-            }
-            
-            result.push(byte);
-            
-            // Update bytes remaining
-            if let Some(remaining) = &mut bytes_remaining {
-                *remaining -= 1;
-            }
-            
-            // Check for newlines
-            if let Some(remaining) = &mut lines_remaining {
-                if byte == b'\n' && *remaining > 0 {
-                    *remaining -= 1;
-                }
-            }
-            
-            // Check for words
-            if let Some(remaining) = &mut words_remaining {
-                let is_whitespace = byte.is_ascii_whitespace();
-                if in_word && is_whitespace {
-                    in_word = false;
-                    if *remaining > 0 {
-                        *remaining -= 1;
-                    }
-                } else if !is_whitespace {
-                    in_word = true;
-                }
-            }
-        }
-        result
-    }
-
-    fn process_tail(&self, data: &[u8], tail_bytes: Option<usize>, tail_words: Option<usize>, tail_lines: Option<usize>) -> Vec<u8> {
-        // For simplicity, we'll process from the end
-        // This implementation may not be perfect for words and lines, but it's a start
-        let mut result = Vec::new();
-        
-        if let Some(bytes) = tail_bytes {
-            let start = if data.len() > bytes { data.len() - bytes } else { 0 };
-            return data[start..].to_vec();
-        }
-        
-        // For words and lines, we need to process from the end
-        // This is a simplified implementation
-        if let Some(lines) = tail_lines {
-            let mut line_count = 0;
-            let mut i = data.len();
-            while i > 0 {
-                i -= 1;
-                if data[i] == b'\n' {
-                    line_count += 1;
-                    if line_count == lines {
-                        break;
-                    }
-                }
-            }
-            return data[i..].to_vec();
-        }
-        
-        if let Some(words) = tail_words {
-            let mut word_count = 0;
-            let mut i = data.len();
-            let mut in_word = false;
-            while i > 0 {
-                i -= 1;
-                let is_whitespace = data[i].is_ascii_whitespace();
-                if !in_word && !is_whitespace {
-                    in_word = true;
-                    word_count += 1;
-                    if word_count == words {
-                        break;
-                    }
-                } else if is_whitespace {
-                    in_word = false;
-                }
-            }
-            return data[i..].to_vec();
-        }
-        
-        data.to_vec()
-    }
-
-    fn process_line_range(&self, data: &[u8], line_start: Option<usize>, line_end: Option<usize>) -> Vec<u8> {
-        let start_line = line_start.unwrap_or(1);
-        let end_line = line_end.unwrap_or(usize::MAX);
-        
-        let mut result = Vec::new();
-        let mut current_line = 1;
-        let mut line_start_index = 0;
-        let mut in_range = false;
-        
-        for (i, &byte) in data.iter().enumerate() {
-            if current_line > end_line {
-                break;
-            }
-            
-            if current_line >= start_line && current_line <= end_line {
-                if !in_range {
-                    in_range = true;
-                    line_start_index = i;
-                }
-                result.push(byte);
-            }
-            
-            if byte == b'\n' {
-                current_line += 1;
-                if current_line > end_line {
-                    break;
-                }
-            }
-        }
-        
-        result
-    }
     
     /// Helper method to output word and line counts
     fn output_word_line_counts(&mut self) -> Vec<crate::meta_plugin::MetaData> {
@@ -580,13 +455,39 @@ impl MetaPlugin for TextMetaPlugin {
             .and_then(|v| v.as_u64())
             .map(|v| v as usize);
         
-        // Apply content filtering if any of the options are present
-        let processed_data = if head_bytes.is_some() || head_words.is_some() || head_lines.is_some() {
-            self.process_head(data, head_bytes, head_words, head_lines)
-        } else if tail_bytes.is_some() || tail_words.is_some() || tail_lines.is_some() {
-            self.process_tail(data, tail_bytes, tail_words, tail_lines)
-        } else if line_start.is_some() || line_end.is_some() {
-            self.process_line_range(data, line_start, line_end)
+        // Build filter string from individual parameters
+        let mut filter_parts = Vec::new();
+        if let Some(bytes) = head_bytes {
+            filter_parts.push(format!("head_bytes({})", bytes));
+        }
+        if let Some(lines) = head_lines {
+            filter_parts.push(format!("head_lines({})", lines));
+        }
+        if let Some(bytes) = tail_bytes {
+            filter_parts.push(format!("tail_bytes({})", bytes));
+        }
+        if let Some(lines) = tail_lines {
+            filter_parts.push(format!("tail_lines({})", lines));
+        }
+        // TODO: Add support for head_words, tail_words, line_start, line_end in filter plugins
+        
+        // Use the filter service to process data
+        let processed_data = if !filter_parts.is_empty() {
+            let filter_str = filter_parts.join(" | ");
+            let filter_service = crate::services::filter_service::FilterService::new();
+            let mut filter_chain = filter_service.create_filter_chain(Some(&filter_str))
+                .map_err(|e| {
+                    log::error!("Failed to create filter chain: {}", e);
+                    data.to_vec()
+                })
+                .unwrap_or_else(|_| data.to_vec());
+            
+            // Process the data through the filter chain
+            filter_service.process_data(&mut filter_chain, data)
+                .unwrap_or_else(|e| {
+                    log::error!("Failed to process data through filter: {}", e);
+                    data.to_vec()
+                })
         } else {
             data.to_vec()
         };
diff --git a/src/services/item_service.rs b/src/services/item_service.rs
index 9b4703d..6b89a61 100644
--- a/src/services/item_service.rs
+++ b/src/services/item_service.rs
@@ -419,131 +419,6 @@ impl ItemService {
         &self.data_path
     }
 
-    fn process_head(&self, content: &[u8], head_bytes: Option<usize>, head_words: Option<usize>, head_lines: Option<usize>) -> Vec<u8> {
-        let mut result = Vec::new();
-        let mut bytes_remaining = head_bytes;
-        let mut words_remaining = head_words;
-        let mut lines_remaining = head_lines;
-        let mut in_word = false;
-        
-        for &byte in content {
-            // Check if any limits are reached
-            if bytes_remaining == Some(0) || words_remaining == Some(0) || lines_remaining == Some(0) {
-                break;
-            }
-            
-            result.push(byte);
-            
-            // Update bytes remaining
-            if let Some(remaining) = &mut bytes_remaining {
-                *remaining -= 1;
-            }
-            
-            // Check for newlines
-            if let Some(remaining) = &mut lines_remaining {
-                if byte == b'\n' && *remaining > 0 {
-                    *remaining -= 1;
-                }
-            }
-            
-            // Check for words
-            if let Some(remaining) = &mut words_remaining {
-                let is_whitespace = byte.is_ascii_whitespace();
-                if in_word && is_whitespace {
-                    in_word = false;
-                    if *remaining > 0 {
-                        *remaining -= 1;
-                    }
-                } else if !is_whitespace {
-                    in_word = true;
-                }
-            }
-        }
-        result
-    }
-
-    fn process_tail(&self, content: &[u8], tail_bytes: Option<usize>, tail_words: Option<usize>, tail_lines: Option<usize>) -> Vec<u8> {
-        // For simplicity, we'll process from the end
-        // This implementation may not be perfect for words and lines, but it's a start
-        let mut result = Vec::new();
-        
-        if let Some(bytes) = tail_bytes {
-            let start = if content.len() > bytes { content.len() - bytes } else { 0 };
-            return content[start..].to_vec();
-        }
-        
-        // For words and lines, we need to process from the end
-        // This is a simplified implementation
-        if let Some(lines) = tail_lines {
-            let mut line_count = 0;
-            let mut i = content.len();
-            while i > 0 {
-                i -= 1;
-                if content[i] == b'\n' {
-                    line_count += 1;
-                    if line_count == lines {
-                        break;
-                    }
-                }
-            }
-            return content[i..].to_vec();
-        }
-        
-        if let Some(words) = tail_words {
-            let mut word_count = 0;
-            let mut i = content.len();
-            let mut in_word = false;
-            while i > 0 {
-                i -= 1;
-                let is_whitespace = content[i].is_ascii_whitespace();
-                if !in_word && !is_whitespace {
-                    in_word = true;
-                    word_count += 1;
-                    if word_count == words {
-                        break;
-                    }
-                } else if is_whitespace {
-                    in_word = false;
-                }
-            }
-            return content[i..].to_vec();
-        }
-        
-        content.to_vec()
-    }
-
-    fn process_line_range(&self, content: &[u8], line_start: Option<usize>, line_end: Option<usize>) -> Vec<u8> {
-        let start_line = line_start.unwrap_or(1);
-        let end_line = line_end.unwrap_or(usize::MAX);
-        
-        let mut result = Vec::new();
-        let mut current_line = 1;
-        let mut line_start_index = 0;
-        let mut in_range = false;
-        
-        for (i, &byte) in content.iter().enumerate() {
-            if current_line > end_line {
-                break;
-            }
-            
-            if current_line >= start_line && current_line <= end_line {
-                if !in_range {
-                    in_range = true;
-                    line_start_index = i;
-                }
-                result.push(byte);
-            }
-            
-            if byte == b'\n' {
-                current_line += 1;
-                if current_line > end_line {
-                    break;
-                }
-            }
-        }
-        
-        result
-    }
 }
 
 // Head filter implementation