use anyhow::Result; use rusqlite::Connection; use crate::common::is_binary::is_binary; use crate::meta_plugin::MetaPlugin; #[derive(Debug, Clone, Default)] pub struct BinaryMetaPlugin { meta_name: String, buffer: Vec, max_buffer_size: usize, is_saved: bool, item_id: Option, outputs: std::collections::HashMap, } impl BinaryMetaPlugin { pub fn new( options: Option>, outputs: Option>, ) -> BinaryMetaPlugin { // Start with default options let mut final_options = Self::default_options(); if let Some(opts) = options { for (key, value) in opts { final_options.insert(key, value); } } // Start with default outputs let mut final_outputs = std::collections::HashMap::new(); let default_outputs = Self::default_outputs(); for output_name in default_outputs { final_outputs.insert(output_name.clone(), serde_yaml::Value::String(output_name)); } if let Some(outs) = outputs { for (key, value) in outs { final_outputs.insert(key, value); } } let max_buffer_size = final_options.get("max_buffer_size") .and_then(|v| v.as_u64()) .unwrap_or(4096) as usize; BinaryMetaPlugin { meta_name: "binary".to_string(), buffer: Vec::new(), max_buffer_size, is_saved: false, item_id: None, outputs: final_outputs, } } pub fn new_simple() -> BinaryMetaPlugin { Self::new(None, None) } fn save_metadata(&mut self, conn: &Connection) -> Result<()> { if !self.is_saved { if let Some(item_id) = self.item_id { let is_binary_result = is_binary(&self.buffer); let value = if is_binary_result { "true".to_string() } else { "false".to_string() }; // Save to database immediately using central output handler let _ = self.save_meta(conn, item_id, "binary", value); self.is_saved = true; } } Ok(()) } } impl MetaPlugin for BinaryMetaPlugin { fn is_internal(&self) -> bool { true } fn finalize(&mut self, conn: &Connection) -> Result<()> { // Save the binary detection result when finalizing, if not already saved self.save_metadata(conn) } fn update(&mut self, data: &[u8], conn: &Connection) { // If we've already saved the metadata, no need to collect more data if self.is_saved { return; } // Calculate how much data we can still accept let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len()); if remaining_capacity > 0 { // Determine how much data to copy let bytes_to_take = std::cmp::min(data.len(), remaining_capacity); // Add data to our buffer self.buffer.extend_from_slice(&data[..bytes_to_take]); } // If we've reached our buffer limit, save the metadata immediately if self.buffer.len() >= self.max_buffer_size { let _ = self.save_metadata(conn); } } fn meta_name(&mut self) -> String { self.meta_name.clone() } fn initialize(&mut self, _conn: &Connection, item_id: i64) -> Result<()> { self.item_id = Some(item_id); Ok(()) } fn configure_options(&mut self, options: &std::collections::HashMap) -> Result<()> { if let Some(max_buffer_size) = options.get("max_buffer_size") { if let Some(size) = max_buffer_size.as_u64() { self.max_buffer_size = size as usize; } } Ok(()) } fn outputs(&self) -> &std::collections::HashMap { &self.outputs } fn outputs_mut(&mut self) -> &mut std::collections::HashMap { &mut self.outputs } fn default_outputs(&self) -> Vec { vec!["binary".to_string()] } fn default_options(&self) -> std::collections::HashMap { let mut options = std::collections::HashMap::new(); options.insert("max_buffer_size".to_string(), serde_yaml::Value::Number(4096.into())); options } }