feat: add content filtering options to content endpoints
Co-authored-by: aider (openai/andrew/openrouter/deepseek/deepseek-chat-v3.1) <aider@aider.chat>
This commit is contained in:
@@ -189,9 +189,6 @@ impl AsyncItemService {
|
|||||||
line_start: Option<usize>,
|
line_start: Option<usize>,
|
||||||
line_end: Option<usize>,
|
line_end: Option<usize>,
|
||||||
) -> Result<(std::pin::Pin<Box<dyn tokio_stream::Stream<Item = Result<tokio_util::bytes::Bytes, std::io::Error>> + Send>>, String), CoreError> {
|
) -> Result<(std::pin::Pin<Box<dyn tokio_stream::Stream<Item = Result<tokio_util::bytes::Bytes, std::io::Error>> + Send>>, String), CoreError> {
|
||||||
let _db = self.db.clone();
|
|
||||||
let _item_service = self.item_service.clone();
|
|
||||||
|
|
||||||
// Use provided metadata to determine MIME type and binary status
|
// Use provided metadata to determine MIME type and binary status
|
||||||
let mime_type = metadata
|
let mime_type = metadata
|
||||||
.get("mime_type")
|
.get("mime_type")
|
||||||
|
|||||||
@@ -80,30 +80,15 @@ impl ItemService {
|
|||||||
line_start: Option<usize>,
|
line_start: Option<usize>,
|
||||||
line_end: Option<usize>,
|
line_end: Option<usize>,
|
||||||
) -> Result<(Vec<u8>, String, bool), CoreError> {
|
) -> Result<(Vec<u8>, String, bool), CoreError> {
|
||||||
let item_with_content = self.get_item_content(conn, id)?;
|
// Use streaming approach to handle all filtering options consistently
|
||||||
let mut content = item_with_content.content;
|
let (mut reader, mime_type, is_binary) = self.get_item_content_info_streaming(
|
||||||
|
conn, id, head_bytes, head_words, head_lines,
|
||||||
|
tail_bytes, tail_words, tail_lines, line_start, line_end
|
||||||
|
)?;
|
||||||
|
|
||||||
// Apply content filtering
|
// Read all the filtered content into a buffer
|
||||||
if head_bytes.is_some() || head_words.is_some() || head_lines.is_some() {
|
let mut content = Vec::new();
|
||||||
content = self.process_head(&content, head_bytes, head_words, head_lines);
|
reader.read_to_end(&mut content)?;
|
||||||
} else if tail_bytes.is_some() || tail_words.is_some() || tail_lines.is_some() {
|
|
||||||
content = self.process_tail(&content, tail_bytes, tail_words, tail_lines);
|
|
||||||
} else if line_start.is_some() || line_end.is_some() {
|
|
||||||
content = self.process_line_range(&content, line_start, line_end);
|
|
||||||
}
|
|
||||||
|
|
||||||
let metadata = item_with_content.item_with_meta.meta_as_map();
|
|
||||||
let mime_type = metadata
|
|
||||||
.get("mime_type")
|
|
||||||
.map(|s| s.to_string())
|
|
||||||
.unwrap_or_else(|| "application/octet-stream".to_string());
|
|
||||||
|
|
||||||
// Check if content is binary
|
|
||||||
let is_binary = if let Some(text_val) = metadata.get("text") {
|
|
||||||
text_val == "false"
|
|
||||||
} else {
|
|
||||||
crate::common::is_binary::is_binary(&content)
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok((content, mime_type, is_binary))
|
Ok((content, mime_type, is_binary))
|
||||||
}
|
}
|
||||||
@@ -131,32 +116,32 @@ impl ItemService {
|
|||||||
let mut item_path = self.data_path.clone();
|
let mut item_path = self.data_path.clone();
|
||||||
item_path.push(item_id.to_string());
|
item_path.push(item_id.to_string());
|
||||||
|
|
||||||
let mut reader = self.compression_service.stream_item_content(item_path.clone(), &item_with_meta.item.compression)?;
|
let reader = self.compression_service.stream_item_content(item_path.clone(), &item_with_meta.item.compression)?;
|
||||||
|
|
||||||
// Apply content filtering
|
// Apply content filtering
|
||||||
if head_bytes.is_some() || head_words.is_some() || head_lines.is_some() {
|
let filtered_reader: Box<dyn Read + Send> = if head_bytes.is_some() || head_words.is_some() || head_lines.is_some() {
|
||||||
reader = Box::new(HeadFilter::new(
|
Box::new(HeadFilter::new(
|
||||||
reader,
|
reader,
|
||||||
head_bytes,
|
head_bytes,
|
||||||
head_words,
|
head_words,
|
||||||
head_lines,
|
head_lines,
|
||||||
));
|
))
|
||||||
} else if tail_bytes.is_some() || tail_words.is_some() || tail_lines.is_some() {
|
} else if tail_bytes.is_some() || tail_words.is_some() || tail_lines.is_some() {
|
||||||
// For tail operations, we need to buffer, but we can do it in a smart way
|
Box::new(TailFilter::new(
|
||||||
// Let's implement a tail filter that uses a fixed-size buffer
|
|
||||||
reader = Box::new(TailFilter::new(
|
|
||||||
reader,
|
reader,
|
||||||
tail_bytes,
|
tail_bytes,
|
||||||
tail_words,
|
tail_words,
|
||||||
tail_lines,
|
tail_lines,
|
||||||
)?);
|
)?)
|
||||||
} else if line_start.is_some() || line_end.is_some() {
|
} else if line_start.is_some() || line_end.is_some() {
|
||||||
reader = Box::new(LineRangeFilter::new(
|
Box::new(LineRangeFilter::new(
|
||||||
reader,
|
reader,
|
||||||
line_start,
|
line_start,
|
||||||
line_end,
|
line_end,
|
||||||
));
|
))
|
||||||
}
|
} else {
|
||||||
|
Box::new(reader)
|
||||||
|
};
|
||||||
|
|
||||||
let metadata = item_with_meta.meta_as_map();
|
let metadata = item_with_meta.meta_as_map();
|
||||||
let mime_type = metadata
|
let mime_type = metadata
|
||||||
@@ -178,7 +163,7 @@ impl ItemService {
|
|||||||
crate::common::is_binary::is_binary(&sample_buffer[..bytes_read])
|
crate::common::is_binary::is_binary(&sample_buffer[..bytes_read])
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok((reader, mime_type, is_binary))
|
Ok((filtered_reader, mime_type, is_binary))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn find_item(&self, conn: &Connection, ids: &[i64], tags: &[String], meta: &HashMap<String, String>) -> Result<ItemWithMeta, CoreError> {
|
pub fn find_item(&self, conn: &Connection, ids: &[i64], tags: &[String], meta: &HashMap<String, String>) -> Result<ItemWithMeta, CoreError> {
|
||||||
|
|||||||
Reference in New Issue
Block a user