feat: add filter plugin system with chained filters

Co-authored-by: aider (openai/andrew/openrouter/deepseek/deepseek-chat-v3.1) <aider@aider.chat>
This commit is contained in:
Andrew Phillips
2025-08-28 20:30:37 -03:00
parent 5cfdc7e35a
commit 4cae92f7cd
7 changed files with 553 additions and 29 deletions

View File

@@ -0,0 +1,65 @@
use super::FilterPlugin;
use std::io::Result;
use regex::Regex;
pub struct GrepFilter {
regex: Regex,
buffer: Vec<u8>,
}
impl GrepFilter {
pub fn new(pattern: String) -> Result<Self> {
let regex = Regex::new(&pattern)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e))?;
Ok(Self {
regex,
buffer: Vec::new(),
})
}
}
impl FilterPlugin for GrepFilter {
fn process(&mut self, data: &[u8]) -> Result<Vec<u8>> {
self.buffer.extend_from_slice(data);
let mut result = Vec::new();
let mut lines = Vec::new();
let mut start = 0;
// Split into lines
for (i, &byte) in self.buffer.iter().enumerate() {
if byte == b'\n' {
lines.push(&self.buffer[start..=i]);
start = i + 1;
}
}
// Keep the remaining data in buffer
self.buffer.drain(0..start);
// Filter lines that match the regex
for line in lines {
if let Ok(line_str) = std::str::from_utf8(line) {
if self.regex.is_match(line_str) {
result.extend_from_slice(line);
}
}
}
Ok(result)
}
fn finish(&mut self) -> Result<Vec<u8>> {
// Process any remaining data in buffer
let mut result = Vec::new();
if !self.buffer.is_empty() {
if let Ok(line_str) = std::str::from_utf8(&self.buffer) {
if self.regex.is_match(line_str) {
result.extend_from_slice(&self.buffer);
}
}
self.buffer.clear();
}
Ok(result)
}
}

View File

@@ -0,0 +1,86 @@
use super::FilterPlugin;
use std::io::Result;
pub struct HeadBytesFilter {
remaining: usize,
}
impl HeadBytesFilter {
pub fn new(count: usize) -> Self {
Self {
remaining: count,
}
}
}
impl FilterPlugin for HeadBytesFilter {
fn process(&mut self, data: &[u8]) -> Result<Vec<u8>> {
if self.remaining == 0 {
return Ok(Vec::new());
}
let bytes_to_take = std::cmp::min(data.len(), self.remaining);
self.remaining -= bytes_to_take;
Ok(data[..bytes_to_take].to_vec())
}
fn finish(&mut self) -> Result<Vec<u8>> {
Ok(Vec::new())
}
}
pub struct HeadLinesFilter {
remaining: usize,
buffer: Vec<u8>,
}
impl HeadLinesFilter {
pub fn new(count: usize) -> Self {
Self {
remaining: count,
buffer: Vec::new(),
}
}
}
impl FilterPlugin for HeadLinesFilter {
fn process(&mut self, data: &[u8]) -> Result<Vec<u8>> {
if self.remaining == 0 {
return Ok(Vec::new());
}
let mut result = Vec::new();
let mut start = 0;
for (i, &byte) in data.iter().enumerate() {
if byte == b'\n' {
self.buffer.extend_from_slice(&data[start..=i]);
result.extend_from_slice(&self.buffer);
self.buffer.clear();
start = i + 1;
self.remaining -= 1;
if self.remaining == 0 {
break;
}
}
}
// Add remaining data to buffer
if start < data.len() {
self.buffer.extend_from_slice(&data[start..]);
}
Ok(result)
}
fn finish(&mut self) -> Result<Vec<u8>> {
if self.remaining > 0 && !self.buffer.is_empty() {
let result = self.buffer.clone();
self.buffer.clear();
Ok(result)
} else {
Ok(Vec::new())
}
}
}

View File

@@ -0,0 +1,88 @@
use std::io::{Read, Result};
use regex::Regex;
use ringbuf::HeapRb;
pub mod head;
pub mod tail;
pub mod grep;
pub mod skip;
pub trait FilterPlugin: Send {
fn process(&mut self, data: &[u8]) -> Result<Vec<u8>>;
fn finish(&mut self) -> Result<Vec<u8>>;
}
pub struct FilterChain {
plugins: Vec<Box<dyn FilterPlugin>>,
}
impl FilterChain {
pub fn new() -> Self {
Self {
plugins: Vec::new(),
}
}
pub fn add_plugin(&mut self, plugin: Box<dyn FilterPlugin>) {
self.plugins.push(plugin);
}
pub fn process(&mut self, data: &[u8]) -> Result<Vec<u8>> {
let mut current_data = data.to_vec();
for plugin in &mut self.plugins {
current_data = plugin.process(&current_data)?;
}
Ok(current_data)
}
pub fn finish(&mut self) -> Result<Vec<u8>> {
let mut current_data = Vec::new();
for plugin in &mut self.plugins {
let processed = plugin.finish()?;
if !processed.is_empty() {
current_data = processed;
}
}
Ok(current_data)
}
}
// Helper function to parse filter string and create appropriate plugins
pub fn parse_filter_string(filter_str: &str) -> Result<FilterChain> {
let mut chain = FilterChain::new();
for part in filter_str.split('|') {
let part = part.trim();
if part.is_empty() {
continue;
}
if let Some(stripped) = part.strip_prefix("grep(").and_then(|s| s.strip_suffix(')')) {
// Remove quotes if present
let pattern = stripped.trim_matches(|c| c == '\'' || c == '"');
chain.add_plugin(Box::new(grep::GrepFilter::new(pattern.to_string())?));
} else if let Some(stripped) = part.strip_prefix("head_bytes(").and_then(|s| s.strip_suffix(')')) {
let count: usize = stripped.parse().map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e))?;
chain.add_plugin(Box::new(head::HeadBytesFilter::new(count)));
} else if let Some(stripped) = part.strip_prefix("head_lines(").and_then(|s| s.strip_suffix(')')) {
let count: usize = stripped.parse().map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e))?;
chain.add_plugin(Box::new(head::HeadLinesFilter::new(count)));
} else if let Some(stripped) = part.strip_prefix("tail_bytes(").and_then(|s| s.strip_suffix(')')) {
let count: usize = stripped.parse().map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e))?;
chain.add_plugin(Box::new(tail::TailBytesFilter::new(count)?));
} else if let Some(stripped) = part.strip_prefix("tail_lines(").and_then(|s| s.strip_suffix(')')) {
let count: usize = stripped.parse().map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e))?;
chain.add_plugin(Box::new(tail::TailLinesFilter::new(count)?));
} else if let Some(stripped) = part.strip_prefix("skip_bytes(").and_then(|s| s.strip_suffix(')')) {
let count: usize = stripped.parse().map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e))?;
chain.add_plugin(Box::new(skip::SkipBytesFilter::new(count)));
} else if let Some(stripped) = part.strip_prefix("skip_lines(").and_then(|s| s.strip_suffix(')')) {
let count: usize = stripped.parse().map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e))?;
chain.add_plugin(Box::new(skip::SkipLinesFilter::new(count)));
} else {
return Err(std::io::Error::new(std::io::ErrorKind::InvalidInput, format!("Unknown filter: {}", part)));
}
}
Ok(chain)
}

98
src/filter_plugin/skip.rs Normal file
View File

@@ -0,0 +1,98 @@
use super::FilterPlugin;
use std::io::Result;
pub struct SkipBytesFilter {
remaining: usize,
}
impl SkipBytesFilter {
pub fn new(count: usize) -> Self {
Self {
remaining: count,
}
}
}
impl FilterPlugin for SkipBytesFilter {
fn process(&mut self, data: &[u8]) -> Result<Vec<u8>> {
if self.remaining == 0 {
return Ok(data.to_vec());
}
if data.len() <= self.remaining {
self.remaining -= data.len();
Ok(Vec::new())
} else {
let result = data[self.remaining..].to_vec();
self.remaining = 0;
Ok(result)
}
}
fn finish(&mut self) -> Result<Vec<u8>> {
Ok(Vec::new())
}
}
pub struct SkipLinesFilter {
remaining: usize,
buffer: Vec<u8>,
}
impl SkipLinesFilter {
pub fn new(count: usize) -> Self {
Self {
remaining: count,
buffer: Vec::new(),
}
}
}
impl FilterPlugin for SkipLinesFilter {
fn process(&mut self, data: &[u8]) -> Result<Vec<u8>> {
if self.remaining == 0 {
let mut result = self.buffer.clone();
result.extend_from_slice(data);
self.buffer.clear();
return Ok(result);
}
let mut result = Vec::new();
let mut start = 0;
for (i, &byte) in data.iter().enumerate() {
if byte == b'\n' {
if self.remaining > 0 {
self.remaining -= 1;
start = i + 1;
} else {
self.buffer.extend_from_slice(&data[start..=i]);
result.extend_from_slice(&self.buffer);
self.buffer.clear();
start = i + 1;
}
}
}
// Add remaining data to buffer
if start < data.len() {
if self.remaining == 0 {
result.extend_from_slice(&data[start..]);
} else {
self.buffer.extend_from_slice(&data[start..]);
}
}
Ok(result)
}
fn finish(&mut self) -> Result<Vec<u8>> {
if self.remaining == 0 {
let result = self.buffer.clone();
self.buffer.clear();
Ok(result)
} else {
Ok(Vec::new())
}
}
}

View File

@@ -0,0 +1,95 @@
use super::FilterPlugin;
use std::io::Result;
use ringbuf::HeapRb;
pub struct TailBytesFilter {
ring_buffer: HeapRb<u8>,
count: usize,
}
impl TailBytesFilter {
pub fn new(count: usize) -> Result<Self> {
Ok(Self {
ring_buffer: HeapRb::new(count),
count,
})
}
}
impl FilterPlugin for TailBytesFilter {
fn process(&mut self, data: &[u8]) -> Result<Vec<u8>> {
for &byte in data {
let _ = self.ring_buffer.push(byte);
}
Ok(Vec::new())
}
fn finish(&mut self) -> Result<Vec<u8>> {
let mut result = Vec::with_capacity(self.ring_buffer.len());
for byte in self.ring_buffer.iter() {
result.push(*byte);
}
Ok(result)
}
}
pub struct TailLinesFilter {
ring_buffer: HeapRb<u8>,
count: usize,
lines_found: usize,
}
impl TailLinesFilter {
pub fn new(count: usize) -> Result<Self> {
Ok(Self {
ring_buffer: HeapRb::new(count * 256), // Estimate 256 bytes per line
count,
lines_found: 0,
})
}
}
impl FilterPlugin for TailLinesFilter {
fn process(&mut self, data: &[u8]) -> Result<Vec<u8>> {
for &byte in data {
let _ = self.ring_buffer.push(byte);
if byte == b'\n' {
self.lines_found += 1;
}
}
Ok(Vec::new())
}
fn finish(&mut self) -> Result<Vec<u8>> {
// Count lines in the buffer to find where to start
let mut lines_to_keep = std::cmp::min(self.count, self.lines_found);
let mut bytes_to_keep = 0;
let mut lines_seen = 0;
// Iterate backwards to find the starting point
for i in (0..self.ring_buffer.len()).rev() {
let index = (self.ring_buffer.write_index() as isize - 1 - i as isize)
.rem_euclid(self.ring_buffer.capacity() as isize) as usize;
let byte = self.ring_buffer[index];
if byte == b'\n' {
lines_seen += 1;
if lines_seen > lines_to_keep {
break;
}
}
bytes_to_keep += 1;
}
// Extract the relevant bytes
let start_index = self.ring_buffer.len() - bytes_to_keep;
let mut result = Vec::with_capacity(bytes_to_keep);
for i in start_index..self.ring_buffer.len() {
let index = (self.ring_buffer.write_index() as isize - (self.ring_buffer.len() - i) as isize)
.rem_euclid(self.ring_buffer.capacity() as isize) as usize;
result.push(self.ring_buffer[index]);
}
Ok(result)
}
}