refactor: reduce code duplication in filter and item services
Co-authored-by: aider (openai/andrew/openrouter/mistralai/mistral-medium-3.1) <aider@aider.chat>
This commit is contained in:
@@ -7,6 +7,7 @@ use crate::services::types::{ItemWithContent, ItemWithMeta};
|
||||
use crate::db::{self, Meta};
|
||||
use crate::compression_engine::{get_compression_engine, CompressionType};
|
||||
use crate::modes::common::settings_compression_type;
|
||||
use crate::filter_plugin::FilterChain;
|
||||
use clap::Command;
|
||||
use log::debug;
|
||||
use ringbuf::HeapRb;
|
||||
@@ -16,6 +17,47 @@ use std::fs;
|
||||
use std::io::{IsTerminal, Read, Write};
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// A reader that applies a filter chain to the data as it's read
|
||||
struct FilteringReader<R: Read> {
|
||||
reader: R,
|
||||
filter_chain: Option<FilterChain>,
|
||||
}
|
||||
|
||||
impl<R: Read> FilteringReader<R> {
|
||||
pub fn new(reader: R, filter_chain: Option<FilterChain>) -> Self {
|
||||
Self { reader, filter_chain }
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read> Read for FilteringReader<R> {
|
||||
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
||||
// Read from the original reader
|
||||
let mut temp_buf = vec![0; buf.len()];
|
||||
let bytes_read = self.reader.read(&mut temp_buf)?;
|
||||
|
||||
if bytes_read == 0 {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// Process through the filter chain if it exists
|
||||
if let Some(chain) = &mut self.filter_chain {
|
||||
match chain.process(&temp_buf[..bytes_read]) {
|
||||
Ok(filtered_data) => {
|
||||
let filtered_len = filtered_data.len();
|
||||
if filtered_len > 0 {
|
||||
buf[..std::cmp::min(filtered_len, buf.len())].copy_from_slice(&filtered_data[..std::cmp::min(filtered_len, buf.len())]);
|
||||
}
|
||||
Ok(filtered_len)
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
} else {
|
||||
buf[..bytes_read].copy_from_slice(&temp_buf[..bytes_read]);
|
||||
Ok(bytes_read)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ItemService {
|
||||
data_path: PathBuf,
|
||||
compression_service: CompressionService,
|
||||
@@ -94,6 +136,70 @@ impl ItemService {
|
||||
Ok((content, mime_type, is_binary))
|
||||
}
|
||||
|
||||
/// Helper method to create a filter chain from parameters
|
||||
fn create_filter_chain(
|
||||
&self,
|
||||
grep: Option<String>,
|
||||
head_bytes: Option<usize>,
|
||||
head_lines: Option<usize>,
|
||||
tail_bytes: Option<usize>,
|
||||
tail_lines: Option<usize>,
|
||||
filter: Option<String>,
|
||||
) -> Result<Option<FilterChain>, CoreError> {
|
||||
// Build filter string from individual parameters (for backward compatibility)
|
||||
let mut filter_parts = Vec::new();
|
||||
if let Some(pattern) = grep {
|
||||
filter_parts.push(format!("grep('{}')", pattern));
|
||||
}
|
||||
if let Some(bytes) = head_bytes {
|
||||
filter_parts.push(format!("head_bytes({})", bytes));
|
||||
}
|
||||
if let Some(lines) = head_lines {
|
||||
filter_parts.push(format!("head_lines({})", lines));
|
||||
}
|
||||
if let Some(bytes) = tail_bytes {
|
||||
filter_parts.push(format!("tail_bytes({})", bytes));
|
||||
}
|
||||
if let Some(lines) = tail_lines {
|
||||
filter_parts.push(format!("tail_lines({})", lines));
|
||||
}
|
||||
|
||||
// Use the provided filter string if available, otherwise build from parts
|
||||
let filter_str = filter.or_else(|| {
|
||||
if filter_parts.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(filter_parts.join(" | "))
|
||||
}
|
||||
});
|
||||
|
||||
// Create filter chain
|
||||
let filter_service = crate::services::filter_service::FilterService::new();
|
||||
filter_service.create_filter_chain(filter_str.as_deref())
|
||||
}
|
||||
|
||||
/// Helper method to determine if content is binary
|
||||
fn is_content_binary(
|
||||
&self,
|
||||
item_path: PathBuf,
|
||||
compression: &str,
|
||||
metadata: &HashMap<String, String>,
|
||||
) -> Result<bool, CoreError> {
|
||||
// Check if we already have text metadata
|
||||
if let Some(text_val) = metadata.get("text") {
|
||||
return Ok(text_val == "false");
|
||||
}
|
||||
|
||||
// Read only the first 8192 bytes for binary detection
|
||||
let mut sample_reader = self.compression_service.stream_item_content(
|
||||
item_path,
|
||||
compression
|
||||
)?;
|
||||
let mut sample_buffer = vec![0; 8192];
|
||||
let bytes_read = sample_reader.read(&mut sample_buffer)?;
|
||||
Ok(crate::common::is_binary::is_binary(&sample_buffer[..bytes_read]))
|
||||
}
|
||||
|
||||
pub fn get_item_content_info_streaming(
|
||||
&self,
|
||||
conn: &Connection,
|
||||
@@ -119,40 +225,16 @@ impl ItemService {
|
||||
let mut item_path = self.data_path.clone();
|
||||
item_path.push(item_id.to_string());
|
||||
|
||||
let reader = self.compression_service.stream_item_content(item_path.clone(), &item_with_meta.item.compression)?;
|
||||
|
||||
// Build filter string from individual parameters (for backward compatibility)
|
||||
let mut filter_parts = Vec::new();
|
||||
if let Some(pattern) = grep {
|
||||
filter_parts.push(format!("grep('{}')", pattern));
|
||||
}
|
||||
if let Some(bytes) = head_bytes {
|
||||
filter_parts.push(format!("head_bytes({})", bytes));
|
||||
}
|
||||
if let Some(lines) = head_lines {
|
||||
filter_parts.push(format!("head_lines({})", lines));
|
||||
}
|
||||
if let Some(bytes) = tail_bytes {
|
||||
filter_parts.push(format!("tail_bytes({})", bytes));
|
||||
}
|
||||
if let Some(lines) = tail_lines {
|
||||
filter_parts.push(format!("tail_lines({})", lines));
|
||||
}
|
||||
// Add other filters as needed
|
||||
|
||||
// Use the provided filter string if available, otherwise build from parts
|
||||
let filter_str = filter.or_else(|| {
|
||||
if filter_parts.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(filter_parts.join(" | "))
|
||||
}
|
||||
});
|
||||
|
||||
let reader = self.compression_service.stream_item_content(
|
||||
item_path.clone(),
|
||||
&item_with_meta.item.compression
|
||||
)?;
|
||||
|
||||
// Create filter chain
|
||||
let filter_service = crate::services::filter_service::FilterService::new();
|
||||
let mut filter_chain = filter_service.create_filter_chain(filter_str.as_deref())?;
|
||||
|
||||
let filter_chain = self.create_filter_chain(
|
||||
grep, head_bytes, head_lines, tail_bytes, tail_lines, filter
|
||||
)?;
|
||||
|
||||
// Wrap the reader with filtering
|
||||
let filtered_reader = Box::new(FilteringReader::new(reader, filter_chain));
|
||||
|
||||
@@ -162,19 +244,12 @@ impl ItemService {
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| "application/octet-stream".to_string());
|
||||
|
||||
// Check if content is binary using only the first 8192 bytes
|
||||
let is_binary = if let Some(text_val) = metadata.get("text") {
|
||||
text_val == "false"
|
||||
} else {
|
||||
// Read only the first 8192 bytes for binary detection
|
||||
let mut sample_reader = self.compression_service.stream_item_content(
|
||||
item_path,
|
||||
&item_with_meta.item.compression
|
||||
)?;
|
||||
let mut sample_buffer = vec![0; 8192];
|
||||
let bytes_read = sample_reader.read(&mut sample_buffer)?;
|
||||
crate::common::is_binary::is_binary(&sample_buffer[..bytes_read])
|
||||
};
|
||||
// Check if content is binary
|
||||
let is_binary = self.is_content_binary(
|
||||
item_path,
|
||||
&item_with_meta.item.compression,
|
||||
&metadata
|
||||
)?;
|
||||
|
||||
Ok((filtered_reader, mime_type, is_binary))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user