feat: add content filtering options to content endpoints

Co-authored-by: aider (openai/andrew/openrouter/deepseek/deepseek-chat-v3.1) <aider@aider.chat>
This commit is contained in:
Andrew Phillips
2025-08-28 20:13:06 -03:00
parent 9ef4ba2abe
commit a419ae960c
2 changed files with 21 additions and 39 deletions

View File

@@ -189,9 +189,6 @@ impl AsyncItemService {
line_start: Option<usize>, line_start: Option<usize>,
line_end: Option<usize>, line_end: Option<usize>,
) -> Result<(std::pin::Pin<Box<dyn tokio_stream::Stream<Item = Result<tokio_util::bytes::Bytes, std::io::Error>> + Send>>, String), CoreError> { ) -> Result<(std::pin::Pin<Box<dyn tokio_stream::Stream<Item = Result<tokio_util::bytes::Bytes, std::io::Error>> + Send>>, String), CoreError> {
let _db = self.db.clone();
let _item_service = self.item_service.clone();
// Use provided metadata to determine MIME type and binary status // Use provided metadata to determine MIME type and binary status
let mime_type = metadata let mime_type = metadata
.get("mime_type") .get("mime_type")

View File

@@ -80,31 +80,16 @@ impl ItemService {
line_start: Option<usize>, line_start: Option<usize>,
line_end: Option<usize>, line_end: Option<usize>,
) -> Result<(Vec<u8>, String, bool), CoreError> { ) -> Result<(Vec<u8>, String, bool), CoreError> {
let item_with_content = self.get_item_content(conn, id)?; // Use streaming approach to handle all filtering options consistently
let mut content = item_with_content.content; let (mut reader, mime_type, is_binary) = self.get_item_content_info_streaming(
conn, id, head_bytes, head_words, head_lines,
tail_bytes, tail_words, tail_lines, line_start, line_end
)?;
// Read all the filtered content into a buffer
let mut content = Vec::new();
reader.read_to_end(&mut content)?;
// Apply content filtering
if head_bytes.is_some() || head_words.is_some() || head_lines.is_some() {
content = self.process_head(&content, head_bytes, head_words, head_lines);
} else if tail_bytes.is_some() || tail_words.is_some() || tail_lines.is_some() {
content = self.process_tail(&content, tail_bytes, tail_words, tail_lines);
} else if line_start.is_some() || line_end.is_some() {
content = self.process_line_range(&content, line_start, line_end);
}
let metadata = item_with_content.item_with_meta.meta_as_map();
let mime_type = metadata
.get("mime_type")
.map(|s| s.to_string())
.unwrap_or_else(|| "application/octet-stream".to_string());
// Check if content is binary
let is_binary = if let Some(text_val) = metadata.get("text") {
text_val == "false"
} else {
crate::common::is_binary::is_binary(&content)
};
Ok((content, mime_type, is_binary)) Ok((content, mime_type, is_binary))
} }
@@ -131,32 +116,32 @@ impl ItemService {
let mut item_path = self.data_path.clone(); let mut item_path = self.data_path.clone();
item_path.push(item_id.to_string()); item_path.push(item_id.to_string());
let mut reader = self.compression_service.stream_item_content(item_path.clone(), &item_with_meta.item.compression)?; let reader = self.compression_service.stream_item_content(item_path.clone(), &item_with_meta.item.compression)?;
// Apply content filtering // Apply content filtering
if head_bytes.is_some() || head_words.is_some() || head_lines.is_some() { let filtered_reader: Box<dyn Read + Send> = if head_bytes.is_some() || head_words.is_some() || head_lines.is_some() {
reader = Box::new(HeadFilter::new( Box::new(HeadFilter::new(
reader, reader,
head_bytes, head_bytes,
head_words, head_words,
head_lines, head_lines,
)); ))
} else if tail_bytes.is_some() || tail_words.is_some() || tail_lines.is_some() { } else if tail_bytes.is_some() || tail_words.is_some() || tail_lines.is_some() {
// For tail operations, we need to buffer, but we can do it in a smart way Box::new(TailFilter::new(
// Let's implement a tail filter that uses a fixed-size buffer
reader = Box::new(TailFilter::new(
reader, reader,
tail_bytes, tail_bytes,
tail_words, tail_words,
tail_lines, tail_lines,
)?); )?)
} else if line_start.is_some() || line_end.is_some() { } else if line_start.is_some() || line_end.is_some() {
reader = Box::new(LineRangeFilter::new( Box::new(LineRangeFilter::new(
reader, reader,
line_start, line_start,
line_end, line_end,
)); ))
} } else {
Box::new(reader)
};
let metadata = item_with_meta.meta_as_map(); let metadata = item_with_meta.meta_as_map();
let mime_type = metadata let mime_type = metadata
@@ -178,7 +163,7 @@ impl ItemService {
crate::common::is_binary::is_binary(&sample_buffer[..bytes_read]) crate::common::is_binary::is_binary(&sample_buffer[..bytes_read])
}; };
Ok((reader, mime_type, is_binary)) Ok((filtered_reader, mime_type, is_binary))
} }
pub fn find_item(&self, conn: &Connection, ids: &[i64], tags: &[String], meta: &HashMap<String, String>) -> Result<ItemWithMeta, CoreError> { pub fn find_item(&self, conn: &Connection, ids: &[i64], tags: &[String], meta: &HashMap<String, String>) -> Result<ItemWithMeta, CoreError> {