Files
keep/src/common/binary_detection.rs
Andrew Phillips e9ab630a74 docs: Add rustdoc for filter_plugin, binary_detection, and lib.rs
Co-authored-by: aider (openai/andrew/openrouter/sonoma-sky-alpha) <aider@aider.chat>
2025-09-10 15:48:54 -03:00

131 lines
4.2 KiB
Rust

use crate::services::async_item_service::AsyncItemService;
use crate::services::error::CoreError;
use axum::http::StatusCode;
use std::collections::HashMap;
/// Check if content is binary when allow_binary is false
///
/// # Arguments
///
/// * `item_service` - Reference to the async item service
/// * `item_id` - The ID of the item to check
/// * `metadata` - Metadata associated with the item
/// * `allow_binary` - Whether binary content is allowed
///
/// # Returns
///
/// * `Result<(), StatusCode>` -
/// * `Ok(())` if binary content is allowed or content is not binary
/// * `Err(StatusCode::BAD_REQUEST)` if binary content is not allowed and content is binary
/// Check if content is binary when allow_binary is false
///
/// Validates whether binary content is permitted for the item. If not allowed and content
/// is detected as binary, returns a bad request status. Uses metadata or streams content
/// for detection if needed.
///
/// # Arguments
///
/// * `item_service` - Reference to the async item service for content access.
/// * `item_id` - The ID of the item to check.
/// * `metadata` - Metadata associated with the item (checked for "text" key).
/// * `allow_binary` - Whether binary content is allowed (bypasses check if true).
///
/// # Returns
///
/// * `Result<(), StatusCode>` -
/// * `Ok(())` if binary content is allowed or content is not binary.
/// * `Err(StatusCode::BAD_REQUEST)` if binary content is not allowed and content is binary.
///
/// # Errors
///
/// Propagates `StatusCode` for validation failures.
///
/// # Examples
///
/// ```
/// // If allow_binary = false and content is text
/// check_binary_content_allowed(&service, 1, &metadata, false)?;
/// // Succeeds
///
/// // If allow_binary = false and content is binary
/// // Returns Err(StatusCode::BAD_REQUEST)
/// ```
pub async fn check_binary_content_allowed(
item_service: &AsyncItemService,
item_id: i64,
metadata: &HashMap<String, String>,
allow_binary: bool,
) -> Result<(), StatusCode> {
if !allow_binary {
let is_binary = is_content_binary(item_service, item_id, metadata).await?;
if is_binary {
return Err(StatusCode::BAD_REQUEST);
}
}
Ok(())
}
/// Helper function to determine if content is binary
///
/// # Arguments
///
/// * `item_service` - Reference to the async item service
/// * `item_id` - The ID of the item to check
/// * `metadata` - Metadata associated with the item
///
/// # Returns
///
/// * `Result<bool, StatusCode>` -
/// * `Ok(true)` if content is binary
/// * `Ok(false)` if content is text
/// * `Err(StatusCode)` if an error occurs during checking
/// Helper function to determine if content is binary
///
/// Checks existing "text" metadata first; if absent or unset, streams and analyzes
/// the content to detect binary nature. Logs warnings on detection failures.
///
/// # Arguments
///
/// * `item_service` - Reference to the async item service for content access.
/// * `item_id` - The ID of the item to check.
/// * `metadata` - Metadata associated with the item (checked for "text" key).
///
/// # Returns
///
/// * `Result<bool, StatusCode>` -
/// * `Ok(true)` if content is binary.
/// * `Ok(false)` if content is text.
/// * `Err(StatusCode)` if an error occurs during checking (e.g., INTERNAL_SERVER_ERROR).
///
/// # Errors
///
/// * `StatusCode::INTERNAL_SERVER_ERROR` if content access fails.
///
/// # Examples
///
/// ```
/// let is_bin = is_content_binary(&service, 1, &metadata).await?;
/// assert!(is_bin == false); // For text content
/// ```
pub async fn is_content_binary(
item_service: &AsyncItemService,
item_id: i64,
metadata: &HashMap<String, String>,
) -> Result<bool, StatusCode> {
if let Some(text_val) = metadata.get("text") {
Ok(text_val == "false")
} else {
// If text metadata isn't set, we need to check the content using streaming approach
match item_service.get_item_content_info_streaming(
item_id,
None
).await {
Ok((_, _, is_binary)) => Ok(is_binary),
Err(e) => {
log::warn!("Failed to get content info for binary check for item {}: {}", item_id, e);
Err(StatusCode::INTERNAL_SERVER_ERROR)
}
}
}
}