feat: add grep option to content endpoints

Co-authored-by: aider (openai/andrew/openrouter/deepseek/deepseek-chat-v3.1) <aider@aider.chat>
This commit is contained in:
Andrew Phillips
2025-08-28 20:16:24 -03:00
parent a419ae960c
commit ee555b253f
2 changed files with 131 additions and 5 deletions

View File

@@ -79,11 +79,12 @@ impl ItemService {
tail_lines: Option<usize>,
line_start: Option<usize>,
line_end: Option<usize>,
grep: Option<String>,
) -> Result<(Vec<u8>, String, bool), CoreError> {
// Use streaming approach to handle all filtering options consistently
let (mut reader, mime_type, is_binary) = self.get_item_content_info_streaming(
conn, id, head_bytes, head_words, head_lines,
tail_bytes, tail_words, tail_lines, line_start, line_end
tail_bytes, tail_words, tail_lines, line_start, line_end, grep
)?;
// Read all the filtered content into a buffer
@@ -105,6 +106,7 @@ impl ItemService {
tail_lines: Option<usize>,
line_start: Option<usize>,
line_end: Option<usize>,
grep: Option<String>,
) -> Result<(Box<dyn Read + Send>, String, bool), CoreError> {
let item_with_meta = self.get_item(conn, id)?;
let item_id = item_with_meta.item.id.ok_or_else(|| CoreError::InvalidInput("Item missing ID".to_string()))?;
@@ -119,7 +121,12 @@ impl ItemService {
let reader = self.compression_service.stream_item_content(item_path.clone(), &item_with_meta.item.compression)?;
// Apply content filtering
let filtered_reader: Box<dyn Read + Send> = if head_bytes.is_some() || head_words.is_some() || head_lines.is_some() {
let filtered_reader: Box<dyn Read + Send> = if let Some(pattern) = grep {
Box::new(GrepFilter::new(
reader,
pattern,
)?)
} else if head_bytes.is_some() || head_words.is_some() || head_lines.is_some() {
Box::new(HeadFilter::new(
reader,
head_bytes,
@@ -789,6 +796,119 @@ impl<R: Read + Send> Read for TailFilter<R> {
}
}
// Grep filter implementation
struct GrepFilter<R: Read + Send> {
inner: R,
regex: regex::Regex,
buffer: Vec<u8>,
buffer_pos: usize,
is_eof: bool,
// For line-based matching
current_line: Vec<u8>,
matched_lines: Vec<Vec<u8>>,
output_pos: usize,
}
impl<R: Read + Send> GrepFilter<R> {
fn new(inner: R, pattern: String) -> std::io::Result<Self> {
let regex = regex::Regex::new(&pattern)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e.to_string()))?;
Ok(Self {
inner,
regex,
buffer: Vec::new(),
buffer_pos: 0,
is_eof: false,
current_line: Vec::new(),
matched_lines: Vec::new(),
output_pos: 0,
})
}
fn fill_buffer(&mut self) -> std::io::Result<()> {
if self.buffer_pos >= self.buffer.len() && !self.is_eof {
// Read more data
let mut temp_buf = [0; 8192];
let n = self.inner.read(&mut temp_buf)?;
if n == 0 {
self.is_eof = true;
// Process any remaining line
if !self.current_line.is_empty() {
self.process_line();
}
return Ok(());
}
self.buffer.extend_from_slice(&temp_buf[..n]);
}
Ok(())
}
fn process_line(&mut self) {
if !self.current_line.is_empty() {
// Convert to string and check against regex
if let Ok(line_str) = std::str::from_utf8(&self.current_line) {
if self.regex.is_match(line_str) {
self.matched_lines.push(self.current_line.clone());
}
}
self.current_line.clear();
}
}
}
impl<R: Read + Send> Read for GrepFilter<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
// First, process the input to find matching lines
while !self.is_eof || self.buffer_pos < self.buffer.len() {
self.fill_buffer()?;
while self.buffer_pos < self.buffer.len() {
let byte = self.buffer[self.buffer_pos];
self.buffer_pos += 1;
self.current_line.push(byte);
if byte == b'\n' {
self.process_line();
}
}
}
// Now output the matched lines
if self.output_pos >= self.matched_lines.iter().map(|l| l.len()).sum() {
return Ok(0);
}
let mut bytes_written = 0;
for line in &self.matched_lines {
if bytes_written >= buf.len() {
break;
}
let line_len = line.len();
if self.output_pos < line_len {
let bytes_to_copy = std::cmp::min(buf.len() - bytes_written, line_len - self.output_pos);
buf[bytes_written..bytes_written + bytes_to_copy]
.copy_from_slice(&line[self.output_pos..self.output_pos + bytes_to_copy]);
bytes_written += bytes_to_copy;
self.output_pos += bytes_to_copy;
} else {
self.output_pos -= line_len;
}
// If we've moved to the next line, reset output_pos
if self.output_pos >= line_len {
self.output_pos = 0;
} else {
break;
}
}
Ok(bytes_written)
}
}
// Line range filter implementation
struct LineRangeFilter<R: Read + Send> {
inner: R,