refactor: streaming, security hardening, and MCP removal
Major overhaul of server architecture and security posture: - Streaming: Unified all I/O through PIPESIZE (8192-byte) buffers. POST bodies stream via MpscReader through the save pipeline. GET content streams from disk via decompression to client. Removed save_item_with_reader, get_item_content_info, ChannelReader. 413 responses keep partial items (nonfatal by design). - Security: XSS protection in all HTML pages via html_escape crate. Security headers middleware (nosniff, frame deny, referrer policy). CORS tightened to explicit headers. Input validation for tags (256 chars), metadata (128/4096), pagination (10k cap). Config file reads use from_utf8_lossy. Generic error messages in HTML. Diff endpoint has 10 MB per-item cap. max_body_size config option. - Panics eliminated: Path unwraps → proper error propagation. Mutex unwraps → map_err (registries) / expect with message (local). - MCP removed: Deleted all MCP code, rmcp dependency, mcp feature. - Docs: Updated README, DESIGN, AGENTS to reflect all changes.
This commit is contained in:
@@ -106,6 +106,8 @@ impl ItemService {
|
||||
/// Retrieves an item with its content, metadata, and tags.
|
||||
///
|
||||
/// Loads the item, its metadata/tags, and decompresses the full content.
|
||||
/// This method is intended for CLI use only and has a size guard (100MB).
|
||||
/// For larger items or server use, use `get_item_content_info_streaming`.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
@@ -120,6 +122,7 @@ impl ItemService {
|
||||
///
|
||||
/// * `CoreError::ItemNotFound(id)` - If the item does not exist.
|
||||
/// * `CoreError::Io(...)` - If file read or decompression fails.
|
||||
/// * `CoreError::InvalidInput(...)` - If item exceeds 100MB size limit.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
@@ -132,6 +135,9 @@ impl ItemService {
|
||||
conn: &Connection,
|
||||
id: i64,
|
||||
) -> Result<ItemWithContent, CoreError> {
|
||||
// Size limit for loading entire content into memory (100MB)
|
||||
const MAX_CONTENT_SIZE: i64 = 100 * 1024 * 1024;
|
||||
|
||||
debug!("ITEM_SERVICE: Getting item content for id: {id}");
|
||||
let item_with_meta = self.get_item(conn, id)?;
|
||||
let item_id = item_with_meta
|
||||
@@ -145,6 +151,16 @@ impl ItemService {
|
||||
)));
|
||||
}
|
||||
|
||||
// Check size guard before loading content
|
||||
if let Some(size) = item_with_meta.item.size
|
||||
&& size > MAX_CONTENT_SIZE
|
||||
{
|
||||
return Err(CoreError::InvalidInput(format!(
|
||||
"Item {} exceeds size limit ({} > {}). Use streaming API for large items.",
|
||||
item_id, size, MAX_CONTENT_SIZE
|
||||
)));
|
||||
}
|
||||
|
||||
let mut item_path = self.data_path.clone();
|
||||
item_path.push(item_id.to_string());
|
||||
debug!("ITEM_SERVICE: Reading content from path: {item_path:?}");
|
||||
@@ -164,47 +180,6 @@ impl ItemService {
|
||||
})
|
||||
}
|
||||
|
||||
/// Retrieves item content with binary detection and optional filtering.
|
||||
///
|
||||
/// Loads content, applies filters if specified, and determines MIME type and binary status.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `conn` - Database connection.
|
||||
/// * `id` - Item ID.
|
||||
/// * `filter` - Optional filter string to apply to content.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * `Result<(Vec<u8>, String, bool), CoreError>` - (content, MIME type, is_binary).
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// * `CoreError::ItemNotFound(id)` - If item not found.
|
||||
/// * Filter or compression errors.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```ignore
|
||||
/// let (content, mime, is_binary) = item_service.get_item_content_info(&conn, 1, Some("head_lines(10)"))?;
|
||||
/// ```
|
||||
pub fn get_item_content_info(
|
||||
&self,
|
||||
conn: &Connection,
|
||||
id: i64,
|
||||
filter: Option<String>,
|
||||
) -> Result<(Vec<u8>, String, bool), CoreError> {
|
||||
// Use streaming approach to handle all filtering options consistently
|
||||
let (mut reader, mime_type, is_binary) =
|
||||
self.get_item_content_info_streaming(conn, id, filter)?;
|
||||
|
||||
// Read all the filtered content into a buffer
|
||||
let mut content = Vec::new();
|
||||
reader.read_to_end(&mut content)?;
|
||||
|
||||
Ok((content, mime_type, is_binary))
|
||||
}
|
||||
|
||||
/// Determines if item content is binary based on metadata or sampling.
|
||||
///
|
||||
/// Checks existing "text" metadata first; if absent, samples the first 8192 bytes.
|
||||
@@ -717,110 +692,6 @@ impl ItemService {
|
||||
Ok(item)
|
||||
}
|
||||
|
||||
/// Saves pre-loaded content as a new item, typically from MCP (Machine-Common-Processing) sources.
|
||||
///
|
||||
/// Bypasses streaming read, directly writes content and applies metadata/plugins.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `content` - Byte slice of content to save.
|
||||
/// * `tags` - Tags to associate.
|
||||
/// * `metadata` - Initial metadata key-value pairs.
|
||||
/// * `cmd` - Mutable command.
|
||||
/// * `settings` - Settings.
|
||||
/// * `conn` - Mutable database connection.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * `Result<ItemWithMeta, CoreError>` - The saved item with full details.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// * `CoreError::Database(...)` - If DB insert fails.
|
||||
/// * `CoreError::Io(...)` - If file write fails.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```ignore
|
||||
/// let content = b"Hello, world!";
|
||||
/// let tags = vec!["mcp".to_string()];
|
||||
/// let meta = HashMap::from([("source".to_string(), "api".to_string())]);
|
||||
/// let item = service.save_item_from_mcp(content, &tags, &meta, &mut cmd, &settings, &mut conn)?;
|
||||
/// ```
|
||||
pub fn save_item_from_mcp(
|
||||
&self,
|
||||
content: &[u8],
|
||||
tags: &Vec<String>,
|
||||
metadata: &HashMap<String, String>,
|
||||
cmd: &mut Command,
|
||||
settings: &Settings,
|
||||
conn: &mut Connection,
|
||||
) -> Result<ItemWithMeta, CoreError> {
|
||||
debug!(
|
||||
"ITEM_SERVICE: Starting save_item_from_mcp with {} bytes, {} tags, {} metadata entries",
|
||||
content.len(),
|
||||
tags.len(),
|
||||
metadata.len()
|
||||
);
|
||||
let compression_type = CompressionType::LZ4;
|
||||
let compression_engine = get_compression_engine(compression_type.clone())?;
|
||||
|
||||
let item_id;
|
||||
let mut item;
|
||||
|
||||
{
|
||||
item = db::create_item(conn, compression_type.clone())?;
|
||||
item_id = item
|
||||
.id
|
||||
.ok_or_else(|| CoreError::InvalidInput("Item missing ID".to_string()))?;
|
||||
debug!("ITEM_SERVICE: Created MCP item with id: {item_id}");
|
||||
|
||||
// Add tags
|
||||
for tag in tags {
|
||||
db::add_tag(conn, item_id, tag)?;
|
||||
}
|
||||
debug!("ITEM_SERVICE: Added {} tags to MCP item", tags.len());
|
||||
|
||||
// Add custom metadata
|
||||
for (key, value) in metadata {
|
||||
db::add_meta(conn, item_id, key, value)?;
|
||||
}
|
||||
debug!(
|
||||
"ITEM_SERVICE: Added {} custom metadata entries to MCP item",
|
||||
metadata.len()
|
||||
);
|
||||
}
|
||||
|
||||
let mut item_path = self.data_path.clone();
|
||||
item_path.push(item_id.to_string());
|
||||
debug!("ITEM_SERVICE: Writing MCP item to path: {item_path:?}");
|
||||
|
||||
let mut writer = compression_engine.create(item_path.clone())?;
|
||||
writer.write_all(content)?;
|
||||
drop(writer);
|
||||
|
||||
let mut plugins = self.meta_service.get_plugins(cmd, settings);
|
||||
debug!(
|
||||
"ITEM_SERVICE: Got {} configured meta plugins for MCP item",
|
||||
plugins.len()
|
||||
);
|
||||
|
||||
self.meta_service
|
||||
.initialize_plugins(&mut plugins, conn, item_id);
|
||||
self.meta_service
|
||||
.process_chunk(&mut plugins, content, conn, item_id);
|
||||
self.meta_service
|
||||
.finalize_plugins(&mut plugins, conn, item_id);
|
||||
debug!("ITEM_SERVICE: Processed MCP item through configured meta plugins");
|
||||
|
||||
item.size = Some(content.len() as i64);
|
||||
db::update_item(conn, item.clone())?;
|
||||
|
||||
debug!("ITEM_SERVICE: MCP item saved successfully");
|
||||
|
||||
self.get_item(conn, item_id)
|
||||
}
|
||||
|
||||
/// Returns a reference to the internal compression service.
|
||||
///
|
||||
/// # Returns
|
||||
|
||||
Reference in New Issue
Block a user