refactor: streaming, security hardening, and MCP removal

Major overhaul of server architecture and security posture:

- Streaming: Unified all I/O through PIPESIZE (8192-byte) buffers.
  POST bodies stream via MpscReader through the save pipeline. GET
  content streams from disk via decompression to client. Removed
  save_item_with_reader, get_item_content_info, ChannelReader.
  413 responses keep partial items (nonfatal by design).

- Security: XSS protection in all HTML pages via html_escape crate.
  Security headers middleware (nosniff, frame deny, referrer policy).
  CORS tightened to explicit headers. Input validation for tags
  (256 chars), metadata (128/4096), pagination (10k cap). Config
  file reads use from_utf8_lossy. Generic error messages in HTML.
  Diff endpoint has 10 MB per-item cap. max_body_size config option.

- Panics eliminated: Path unwraps → proper error propagation.
  Mutex unwraps → map_err (registries) / expect with message (local).

- MCP removed: Deleted all MCP code, rmcp dependency, mcp feature.

- Docs: Updated README, DESIGN, AGENTS to reflect all changes.
This commit is contained in:
2026-03-14 00:03:42 -03:00
parent 560ba6e20c
commit 17be6abaab
51 changed files with 876 additions and 1309 deletions

View File

@@ -106,6 +106,8 @@ impl ItemService {
/// Retrieves an item with its content, metadata, and tags.
///
/// Loads the item, its metadata/tags, and decompresses the full content.
/// This method is intended for CLI use only and has a size guard (100MB).
/// For larger items or server use, use `get_item_content_info_streaming`.
///
/// # Arguments
///
@@ -120,6 +122,7 @@ impl ItemService {
///
/// * `CoreError::ItemNotFound(id)` - If the item does not exist.
/// * `CoreError::Io(...)` - If file read or decompression fails.
/// * `CoreError::InvalidInput(...)` - If item exceeds 100MB size limit.
///
/// # Examples
///
@@ -132,6 +135,9 @@ impl ItemService {
conn: &Connection,
id: i64,
) -> Result<ItemWithContent, CoreError> {
// Size limit for loading entire content into memory (100MB)
const MAX_CONTENT_SIZE: i64 = 100 * 1024 * 1024;
debug!("ITEM_SERVICE: Getting item content for id: {id}");
let item_with_meta = self.get_item(conn, id)?;
let item_id = item_with_meta
@@ -145,6 +151,16 @@ impl ItemService {
)));
}
// Check size guard before loading content
if let Some(size) = item_with_meta.item.size
&& size > MAX_CONTENT_SIZE
{
return Err(CoreError::InvalidInput(format!(
"Item {} exceeds size limit ({} > {}). Use streaming API for large items.",
item_id, size, MAX_CONTENT_SIZE
)));
}
let mut item_path = self.data_path.clone();
item_path.push(item_id.to_string());
debug!("ITEM_SERVICE: Reading content from path: {item_path:?}");
@@ -164,47 +180,6 @@ impl ItemService {
})
}
/// Retrieves item content with binary detection and optional filtering.
///
/// Loads content, applies filters if specified, and determines MIME type and binary status.
///
/// # Arguments
///
/// * `conn` - Database connection.
/// * `id` - Item ID.
/// * `filter` - Optional filter string to apply to content.
///
/// # Returns
///
/// * `Result<(Vec<u8>, String, bool), CoreError>` - (content, MIME type, is_binary).
///
/// # Errors
///
/// * `CoreError::ItemNotFound(id)` - If item not found.
/// * Filter or compression errors.
///
/// # Examples
///
/// ```ignore
/// let (content, mime, is_binary) = item_service.get_item_content_info(&conn, 1, Some("head_lines(10)"))?;
/// ```
pub fn get_item_content_info(
&self,
conn: &Connection,
id: i64,
filter: Option<String>,
) -> Result<(Vec<u8>, String, bool), CoreError> {
// Use streaming approach to handle all filtering options consistently
let (mut reader, mime_type, is_binary) =
self.get_item_content_info_streaming(conn, id, filter)?;
// Read all the filtered content into a buffer
let mut content = Vec::new();
reader.read_to_end(&mut content)?;
Ok((content, mime_type, is_binary))
}
/// Determines if item content is binary based on metadata or sampling.
///
/// Checks existing "text" metadata first; if absent, samples the first 8192 bytes.
@@ -717,110 +692,6 @@ impl ItemService {
Ok(item)
}
/// Saves pre-loaded content as a new item, typically from MCP (Machine-Common-Processing) sources.
///
/// Bypasses streaming read, directly writes content and applies metadata/plugins.
///
/// # Arguments
///
/// * `content` - Byte slice of content to save.
/// * `tags` - Tags to associate.
/// * `metadata` - Initial metadata key-value pairs.
/// * `cmd` - Mutable command.
/// * `settings` - Settings.
/// * `conn` - Mutable database connection.
///
/// # Returns
///
/// * `Result<ItemWithMeta, CoreError>` - The saved item with full details.
///
/// # Errors
///
/// * `CoreError::Database(...)` - If DB insert fails.
/// * `CoreError::Io(...)` - If file write fails.
///
/// # Examples
///
/// ```ignore
/// let content = b"Hello, world!";
/// let tags = vec!["mcp".to_string()];
/// let meta = HashMap::from([("source".to_string(), "api".to_string())]);
/// let item = service.save_item_from_mcp(content, &tags, &meta, &mut cmd, &settings, &mut conn)?;
/// ```
pub fn save_item_from_mcp(
&self,
content: &[u8],
tags: &Vec<String>,
metadata: &HashMap<String, String>,
cmd: &mut Command,
settings: &Settings,
conn: &mut Connection,
) -> Result<ItemWithMeta, CoreError> {
debug!(
"ITEM_SERVICE: Starting save_item_from_mcp with {} bytes, {} tags, {} metadata entries",
content.len(),
tags.len(),
metadata.len()
);
let compression_type = CompressionType::LZ4;
let compression_engine = get_compression_engine(compression_type.clone())?;
let item_id;
let mut item;
{
item = db::create_item(conn, compression_type.clone())?;
item_id = item
.id
.ok_or_else(|| CoreError::InvalidInput("Item missing ID".to_string()))?;
debug!("ITEM_SERVICE: Created MCP item with id: {item_id}");
// Add tags
for tag in tags {
db::add_tag(conn, item_id, tag)?;
}
debug!("ITEM_SERVICE: Added {} tags to MCP item", tags.len());
// Add custom metadata
for (key, value) in metadata {
db::add_meta(conn, item_id, key, value)?;
}
debug!(
"ITEM_SERVICE: Added {} custom metadata entries to MCP item",
metadata.len()
);
}
let mut item_path = self.data_path.clone();
item_path.push(item_id.to_string());
debug!("ITEM_SERVICE: Writing MCP item to path: {item_path:?}");
let mut writer = compression_engine.create(item_path.clone())?;
writer.write_all(content)?;
drop(writer);
let mut plugins = self.meta_service.get_plugins(cmd, settings);
debug!(
"ITEM_SERVICE: Got {} configured meta plugins for MCP item",
plugins.len()
);
self.meta_service
.initialize_plugins(&mut plugins, conn, item_id);
self.meta_service
.process_chunk(&mut plugins, content, conn, item_id);
self.meta_service
.finalize_plugins(&mut plugins, conn, item_id);
debug!("ITEM_SERVICE: Processed MCP item through configured meta plugins");
item.size = Some(content.len() as i64);
db::update_item(conn, item.clone())?;
debug!("ITEM_SERVICE: MCP item saved successfully");
self.get_item(conn, item_id)
}
/// Returns a reference to the internal compression service.
///
/// # Returns