feat: add filter plugin system with chained filters

Co-authored-by: aider (openai/andrew/openrouter/deepseek/deepseek-chat-v3.1) <aider@aider.chat>
This commit is contained in:
Andrew Phillips
2025-08-28 20:30:37 -03:00
parent 5cfdc7e35a
commit 4cae92f7cd
7 changed files with 553 additions and 29 deletions

View File

@@ -107,6 +107,7 @@ impl ItemService {
line_start: Option<usize>,
line_end: Option<usize>,
grep: Option<String>,
filter: Option<String>,
) -> Result<(Box<dyn Read + Send>, String, bool), CoreError> {
let item_with_meta = self.get_item(conn, id)?;
let item_id = item_with_meta.item.id.ok_or_else(|| CoreError::InvalidInput("Item missing ID".to_string()))?;
@@ -120,35 +121,40 @@ impl ItemService {
let reader = self.compression_service.stream_item_content(item_path.clone(), &item_with_meta.item.compression)?;
// Apply content filtering
let filtered_reader: Box<dyn Read + Send> = if let Some(pattern) = grep {
Box::new(GrepFilter::new(
reader,
pattern,
)?)
} else if head_bytes.is_some() || head_words.is_some() || head_lines.is_some() {
Box::new(HeadFilter::new(
reader,
head_bytes,
head_words,
head_lines,
))
} else if tail_bytes.is_some() || tail_words.is_some() || tail_lines.is_some() {
Box::new(TailFilter::new(
reader,
tail_bytes,
tail_words,
tail_lines,
)?)
} else if line_start.is_some() || line_end.is_some() {
Box::new(LineRangeFilter::new(
reader,
line_start,
line_end,
))
} else {
Box::new(reader)
};
// Build filter string from individual parameters (for backward compatibility)
let mut filter_parts = Vec::new();
if let Some(pattern) = grep {
filter_parts.push(format!("grep('{}')", pattern));
}
if let Some(bytes) = head_bytes {
filter_parts.push(format!("head_bytes({})", bytes));
}
if let Some(lines) = head_lines {
filter_parts.push(format!("head_lines({})", lines));
}
if let Some(bytes) = tail_bytes {
filter_parts.push(format!("tail_bytes({})", bytes));
}
if let Some(lines) = tail_lines {
filter_parts.push(format!("tail_lines({})", lines));
}
// Add other filters as needed
// Use the provided filter string if available, otherwise build from parts
let filter_str = filter.or_else(|| {
if filter_parts.is_empty() {
None
} else {
Some(filter_parts.join(" | "))
}
});
// Create filter chain
let filter_service = crate::services::filter_service::FilterService::new();
let mut filter_chain = filter_service.create_filter_chain(filter_str.as_deref())?;
// Wrap the reader with filtering
let filtered_reader = Box::new(FilteringReader::new(reader, filter_chain));
let metadata = item_with_meta.meta_as_map();
let mime_type = metadata
@@ -909,6 +915,58 @@ impl<R: Read + Send> Read for GrepFilter<R> {
}
}
// Filtering reader that applies filter plugins
struct FilteringReader<R: Read + Send> {
inner: R,
filter_chain: Option<FilterChain>,
buffer: Vec<u8>,
buffer_pos: usize,
}
impl<R: Read + Send> FilteringReader<R> {
fn new(inner: R, filter_chain: Option<FilterChain>) -> Self {
Self {
inner,
filter_chain,
buffer: Vec::new(),
buffer_pos: 0,
}
}
}
impl<R: Read + Send> Read for FilteringReader<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
if self.buffer_pos >= self.buffer.len() {
// Read more data from the inner reader
let mut temp_buf = vec![0; 8192];
let n = self.inner.read(&mut temp_buf)?;
if n == 0 {
// End of input, finish filtering
let filter_service = crate::services::filter_service::FilterService::new();
let remaining = filter_service.finish_processing(&mut self.filter_chain)?;
self.buffer = remaining;
self.buffer_pos = 0;
if self.buffer.is_empty() {
return Ok(0);
}
} else {
// Process the chunk
let filter_service = crate::services::filter_service::FilterService::new();
let processed = filter_service.process_data(&mut self.filter_chain, &temp_buf[..n])?;
self.buffer = processed;
self.buffer_pos = 0;
}
}
// Copy from buffer to output
let bytes_to_copy = std::cmp::min(buf.len(), self.buffer.len() - self.buffer_pos);
buf[..bytes_to_copy].copy_from_slice(&self.buffer[self.buffer_pos..self.buffer_pos + bytes_to_copy]);
self.buffer_pos += bytes_to_copy;
Ok(bytes_to_copy)
}
}
// Line range filter implementation
struct LineRangeFilter<R: Read + Send> {
inner: R,