refactor: reduce code duplication in filter and item services

Co-authored-by: aider (openai/andrew/openrouter/mistralai/mistral-medium-3.1) <aider@aider.chat>
This commit is contained in:
Andrew Phillips
2025-08-28 20:51:39 -03:00
parent 5542f5592a
commit 4c8466bb21
5 changed files with 268 additions and 151 deletions

View File

@@ -361,15 +361,9 @@ impl AsyncItemService {
tags: Vec<String>,
meta: HashMap<String, String>,
) -> Result<ItemWithMeta, CoreError> {
let db = self.db.clone();
let item_service = self.item_service.clone();
tokio::task::spawn_blocking(move || {
let conn = db.blocking_lock();
item_service.find_item(&conn, &ids, &tags, &meta)
})
.await
.unwrap()
self.execute_blocking(|conn, item_service| {
item_service.find_item(conn, &ids, &tags, &meta)
}).await
}
pub async fn list_items(
@@ -377,27 +371,15 @@ impl AsyncItemService {
tags: Vec<String>,
meta: HashMap<String, String>,
) -> Result<Vec<ItemWithMeta>, CoreError> {
let db = self.db.clone();
let item_service = self.item_service.clone();
tokio::task::spawn_blocking(move || {
let conn = db.blocking_lock();
item_service.list_items(&conn, &tags, &meta)
})
.await
.unwrap()
self.execute_blocking(|conn, item_service| {
item_service.list_items(conn, &tags, &meta)
}).await
}
pub async fn delete_item(&self, id: i64) -> Result<(), CoreError> {
let db = self.db.clone();
let item_service = self.item_service.clone();
tokio::task::spawn_blocking(move || {
let mut conn = db.blocking_lock();
item_service.delete_item(&mut conn, id)
})
.await
.unwrap()
self.execute_blocking_mut(|conn, item_service| {
item_service.delete_item(conn, id)
}).await
}
pub async fn save_item_from_mcp(

View File

@@ -7,6 +7,7 @@ use crate::services::types::{ItemWithContent, ItemWithMeta};
use crate::db::{self, Meta};
use crate::compression_engine::{get_compression_engine, CompressionType};
use crate::modes::common::settings_compression_type;
use crate::filter_plugin::FilterChain;
use clap::Command;
use log::debug;
use ringbuf::HeapRb;
@@ -16,6 +17,47 @@ use std::fs;
use std::io::{IsTerminal, Read, Write};
use std::path::PathBuf;
/// A reader that applies a filter chain to the data as it's read
struct FilteringReader<R: Read> {
reader: R,
filter_chain: Option<FilterChain>,
}
impl<R: Read> FilteringReader<R> {
pub fn new(reader: R, filter_chain: Option<FilterChain>) -> Self {
Self { reader, filter_chain }
}
}
impl<R: Read> Read for FilteringReader<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
// Read from the original reader
let mut temp_buf = vec![0; buf.len()];
let bytes_read = self.reader.read(&mut temp_buf)?;
if bytes_read == 0 {
return Ok(0);
}
// Process through the filter chain if it exists
if let Some(chain) = &mut self.filter_chain {
match chain.process(&temp_buf[..bytes_read]) {
Ok(filtered_data) => {
let filtered_len = filtered_data.len();
if filtered_len > 0 {
buf[..std::cmp::min(filtered_len, buf.len())].copy_from_slice(&filtered_data[..std::cmp::min(filtered_len, buf.len())]);
}
Ok(filtered_len)
}
Err(e) => Err(e),
}
} else {
buf[..bytes_read].copy_from_slice(&temp_buf[..bytes_read]);
Ok(bytes_read)
}
}
}
pub struct ItemService {
data_path: PathBuf,
compression_service: CompressionService,
@@ -94,6 +136,70 @@ impl ItemService {
Ok((content, mime_type, is_binary))
}
/// Helper method to create a filter chain from parameters
fn create_filter_chain(
&self,
grep: Option<String>,
head_bytes: Option<usize>,
head_lines: Option<usize>,
tail_bytes: Option<usize>,
tail_lines: Option<usize>,
filter: Option<String>,
) -> Result<Option<FilterChain>, CoreError> {
// Build filter string from individual parameters (for backward compatibility)
let mut filter_parts = Vec::new();
if let Some(pattern) = grep {
filter_parts.push(format!("grep('{}')", pattern));
}
if let Some(bytes) = head_bytes {
filter_parts.push(format!("head_bytes({})", bytes));
}
if let Some(lines) = head_lines {
filter_parts.push(format!("head_lines({})", lines));
}
if let Some(bytes) = tail_bytes {
filter_parts.push(format!("tail_bytes({})", bytes));
}
if let Some(lines) = tail_lines {
filter_parts.push(format!("tail_lines({})", lines));
}
// Use the provided filter string if available, otherwise build from parts
let filter_str = filter.or_else(|| {
if filter_parts.is_empty() {
None
} else {
Some(filter_parts.join(" | "))
}
});
// Create filter chain
let filter_service = crate::services::filter_service::FilterService::new();
filter_service.create_filter_chain(filter_str.as_deref())
}
/// Helper method to determine if content is binary
fn is_content_binary(
&self,
item_path: PathBuf,
compression: &str,
metadata: &HashMap<String, String>,
) -> Result<bool, CoreError> {
// Check if we already have text metadata
if let Some(text_val) = metadata.get("text") {
return Ok(text_val == "false");
}
// Read only the first 8192 bytes for binary detection
let mut sample_reader = self.compression_service.stream_item_content(
item_path,
compression
)?;
let mut sample_buffer = vec![0; 8192];
let bytes_read = sample_reader.read(&mut sample_buffer)?;
Ok(crate::common::is_binary::is_binary(&sample_buffer[..bytes_read]))
}
pub fn get_item_content_info_streaming(
&self,
conn: &Connection,
@@ -119,40 +225,16 @@ impl ItemService {
let mut item_path = self.data_path.clone();
item_path.push(item_id.to_string());
let reader = self.compression_service.stream_item_content(item_path.clone(), &item_with_meta.item.compression)?;
// Build filter string from individual parameters (for backward compatibility)
let mut filter_parts = Vec::new();
if let Some(pattern) = grep {
filter_parts.push(format!("grep('{}')", pattern));
}
if let Some(bytes) = head_bytes {
filter_parts.push(format!("head_bytes({})", bytes));
}
if let Some(lines) = head_lines {
filter_parts.push(format!("head_lines({})", lines));
}
if let Some(bytes) = tail_bytes {
filter_parts.push(format!("tail_bytes({})", bytes));
}
if let Some(lines) = tail_lines {
filter_parts.push(format!("tail_lines({})", lines));
}
// Add other filters as needed
// Use the provided filter string if available, otherwise build from parts
let filter_str = filter.or_else(|| {
if filter_parts.is_empty() {
None
} else {
Some(filter_parts.join(" | "))
}
});
let reader = self.compression_service.stream_item_content(
item_path.clone(),
&item_with_meta.item.compression
)?;
// Create filter chain
let filter_service = crate::services::filter_service::FilterService::new();
let mut filter_chain = filter_service.create_filter_chain(filter_str.as_deref())?;
let filter_chain = self.create_filter_chain(
grep, head_bytes, head_lines, tail_bytes, tail_lines, filter
)?;
// Wrap the reader with filtering
let filtered_reader = Box::new(FilteringReader::new(reader, filter_chain));
@@ -162,19 +244,12 @@ impl ItemService {
.map(|s| s.to_string())
.unwrap_or_else(|| "application/octet-stream".to_string());
// Check if content is binary using only the first 8192 bytes
let is_binary = if let Some(text_val) = metadata.get("text") {
text_val == "false"
} else {
// Read only the first 8192 bytes for binary detection
let mut sample_reader = self.compression_service.stream_item_content(
item_path,
&item_with_meta.item.compression
)?;
let mut sample_buffer = vec![0; 8192];
let bytes_read = sample_reader.read(&mut sample_buffer)?;
crate::common::is_binary::is_binary(&sample_buffer[..bytes_read])
};
// Check if content is binary
let is_binary = self.is_content_binary(
item_path,
&item_with_meta.item.compression,
&metadata
)?;
Ok((filtered_reader, mime_type, is_binary))
}