feat: implement streaming for large file handling
Co-authored-by: aider (openai/andrew/openrouter/deepseek/deepseek-chat-v3.1) <aider@aider.chat>
This commit is contained in:
@@ -145,51 +145,93 @@ impl AsyncItemService {
|
||||
let db = self.db.clone();
|
||||
let item_service = self.item_service.clone();
|
||||
|
||||
// Get item content
|
||||
let content = tokio::task::spawn_blocking(move || {
|
||||
let conn = db.blocking_lock();
|
||||
let item_with_content = item_service.get_item_content(&conn, item_id)?;
|
||||
Ok::<_, CoreError>(item_with_content.content)
|
||||
})
|
||||
.await
|
||||
.unwrap()?;
|
||||
|
||||
// Use provided metadata to determine MIME type and binary status
|
||||
let mime_type = metadata
|
||||
.get("mime_type")
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| "application/octet-stream".to_string());
|
||||
|
||||
let is_binary = if let Some(binary_val) = metadata.get("binary") {
|
||||
binary_val == "true"
|
||||
} else {
|
||||
crate::common::is_binary::is_binary(&content)
|
||||
};
|
||||
|
||||
// Check if content is binary when allow_binary is false
|
||||
if !allow_binary && is_binary {
|
||||
return Err(CoreError::InvalidInput("Binary content not allowed".to_string()));
|
||||
if !allow_binary {
|
||||
let is_binary = if let Some(binary_val) = metadata.get("binary") {
|
||||
binary_val == "true"
|
||||
} else {
|
||||
// Get binary status using streaming approach
|
||||
let (_, _, is_binary) = self.get_item_content_info_streaming(item_id).await?;
|
||||
is_binary
|
||||
};
|
||||
|
||||
if is_binary {
|
||||
return Err(CoreError::InvalidInput("Binary content not allowed".to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
// Create a stream that reads only the requested portion
|
||||
let content_len = content.len() as u64;
|
||||
|
||||
// Apply offset and length constraints
|
||||
let start = std::cmp::min(offset, content_len);
|
||||
let end = if length > 0 {
|
||||
std::cmp::min(start + length, content_len)
|
||||
} else {
|
||||
content_len
|
||||
// Get a streaming reader for the content
|
||||
let (mut reader, content_len) = {
|
||||
let db = self.db.clone();
|
||||
let item_service = self.item_service.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let conn = db.blocking_lock();
|
||||
let item_with_meta = item_service.get_item(&conn, item_id)?;
|
||||
let item_id_val = item_with_meta.item.id.ok_or_else(|| CoreError::InvalidInput("Item missing ID".to_string()))?;
|
||||
|
||||
let mut item_path = item_service.data_path.clone();
|
||||
item_path.push(item_id_val.to_string());
|
||||
|
||||
let reader = item_service.compression_service.stream_item_content(
|
||||
item_path,
|
||||
&item_with_meta.item.compression
|
||||
)?;
|
||||
|
||||
// Get content length from metadata
|
||||
let content_len = item_with_meta.item.size.unwrap_or(0) as u64;
|
||||
|
||||
Ok::<_, CoreError>((reader, content_len))
|
||||
})
|
||||
.await
|
||||
.unwrap()?
|
||||
};
|
||||
|
||||
let stream = if start < content_len {
|
||||
let chunk = tokio_util::bytes::Bytes::from(content[start as usize..end as usize].to_vec());
|
||||
Box::pin(tokio_stream::iter(vec![Ok(chunk)]))
|
||||
// Apply offset by reading and discarding bytes
|
||||
if offset > 0 {
|
||||
let mut remaining = offset;
|
||||
let mut buf = [0; 8192];
|
||||
while remaining > 0 {
|
||||
let to_read = std::cmp::min(remaining, buf.len() as u64);
|
||||
let n = reader.read(&mut buf[..to_read as usize])?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
remaining -= n as u64;
|
||||
}
|
||||
}
|
||||
|
||||
// Create a stream that reads the content in chunks
|
||||
let stream = tokio_stream::wrappers::ReaderStream::new(reader);
|
||||
|
||||
// If length is specified, we need to limit the stream
|
||||
let limited_stream = if length > 0 {
|
||||
Box::pin(stream.take(length as usize))
|
||||
} else {
|
||||
Box::pin(tokio_stream::iter(vec![]))
|
||||
Box::pin(stream)
|
||||
};
|
||||
|
||||
Ok((stream, mime_type))
|
||||
Ok((limited_stream, mime_type))
|
||||
}
|
||||
|
||||
pub async fn get_item_content_info_streaming(
|
||||
&self,
|
||||
item_id: i64,
|
||||
) -> Result<(Box<dyn Read + Send>, String, bool), CoreError> {
|
||||
let db = self.db.clone();
|
||||
let item_service = self.item_service.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let conn = db.blocking_lock();
|
||||
item_service.get_item_content_info_streaming(&conn, item_id)
|
||||
})
|
||||
.await
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub async fn find_item(
|
||||
|
||||
Reference in New Issue
Block a user