use magic::{Cookie, CookieFlags}; use std::io; use crate::common::PIPESIZE; use crate::meta_plugin::{MetaPlugin, MetaPluginType}; #[derive(Debug)] pub struct MagicFileMetaPlugin { buffer: Vec, max_buffer_size: usize, is_finalized: bool, cookie: Option, base: crate::meta_plugin::BaseMetaPlugin, } impl MagicFileMetaPlugin { pub fn new( options: Option>, outputs: Option>, ) -> MagicFileMetaPlugin { // Start with default options let mut final_options = std::collections::HashMap::new(); final_options.insert("max_buffer_size".to_string(), serde_yaml::Value::Number(PIPESIZE.into())); if let Some(opts) = options { for (key, value) in opts { final_options.insert(key, value); } } // Start with default outputs let mut final_outputs = std::collections::HashMap::new(); let default_outputs = vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()]; for output_name in default_outputs { final_outputs.insert(output_name.clone(), serde_yaml::Value::String(output_name)); } if let Some(outs) = outputs { for (key, value) in outs { final_outputs.insert(key, value); } } let max_buffer_size = final_options.get("max_buffer_size") .and_then(|v| v.as_u64()) .unwrap_or(PIPESIZE as u64) as usize; // Ensure the default max_buffer_size is in the options if !final_options.contains_key("max_buffer_size") { final_options.insert("max_buffer_size".to_string(), serde_yaml::Value::Number(PIPESIZE.into())); } let mut base = crate::meta_plugin::BaseMetaPlugin::new(); base.outputs = final_outputs; base.options = final_options; MagicFileMetaPlugin { buffer: Vec::new(), max_buffer_size, is_finalized: false, cookie: None, base, } } fn get_magic_result(&self, flags: CookieFlags) -> io::Result { // Use the existing cookie and just change flags if let Some(cookie) = &self.cookie { cookie.set_flags(flags) .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to set magic flags: {}", e)))?; let result = cookie.buffer(&self.buffer) .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?; // Clean up the result - remove extra whitespace and take first part if needed let trimmed = result.trim(); // For some magic results, we might want just the first part before semicolon or comma let cleaned = if trimmed.contains(';') { trimmed.split(';').next().unwrap_or(trimmed).trim() } else if trimmed.contains(',') && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) { trimmed.split(',').next().unwrap_or(trimmed).trim() } else { trimmed }; Ok(cleaned.to_string()) } else { Err(io::Error::new(io::ErrorKind::Other, "Magic cookie not initialized")) } } /// Helper function to process all magic types and collect metadata fn process_magic_types(&self) -> Vec { let mut metadata = Vec::new(); // Define the types to process with their corresponding flags let types_to_process = [ ("mime_type", CookieFlags::MIME_TYPE), ("mime_encoding", CookieFlags::MIME_ENCODING), ("file_type", CookieFlags::default()), ]; for (name, flags) in types_to_process.iter() { if let Ok(result) = self.get_magic_result(*flags) { if !result.is_empty() { // Use process_metadata_outputs to handle output mapping if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( name, serde_yaml::Value::String(result), self.base.outputs() ) { metadata.push(meta_data); } } } } metadata } } impl MetaPlugin for MagicFileMetaPlugin { /// Checks if the plugin has been finalized. /// /// # Returns /// /// `true` if finalized, `false` otherwise. fn is_finalized(&self) -> bool { self.is_finalized } /// Sets the finalized state of the plugin. /// /// # Arguments /// /// * `finalized` - The new finalized state. fn set_finalized(&mut self, finalized: bool) { self.is_finalized = finalized; } /// Initializes the magic cookie for file type detection. /// /// Loads the magic database; finalizes if initialization fails. /// /// # Returns /// /// A `MetaPluginResponse` with empty metadata; `is_finalized` is `true` on failure. /// /// # Errors /// /// Logs errors; returns finalized response on cookie or load failure. /// /// # Examples /// /// ``` /// let mut plugin = MagicFileMetaPlugin::new(None, None); /// let response = plugin.initialize(); /// ``` fn initialize(&mut self) -> crate::meta_plugin::MetaPluginResponse { // Initialize the magic cookie once let cookie = match Cookie::open(Default::default()) { Ok(cookie) => cookie, Err(_e) => { return crate::meta_plugin::MetaPluginResponse { metadata: Vec::new(), is_finalized: true, }; } }; if let Err(_e) = cookie.load(&[] as &[&str]) { return crate::meta_plugin::MetaPluginResponse { metadata: Vec::new(), is_finalized: true, }; } self.cookie = Some(cookie); crate::meta_plugin::MetaPluginResponse { metadata: Vec::new(), is_finalized: false, } } /// Finalizes the plugin and performs file type detection. /// /// Analyzes the accumulated buffer and outputs detected types. /// /// # Returns /// /// A `MetaPluginResponse` with detection metadata and finalized state set to `true`. /// /// # Examples /// /// ``` /// let mut plugin = MagicFileMetaPlugin::new(None, None); /// // ... after updates /// let response = plugin.finalize(); /// assert!(response.is_finalized); /// ``` fn finalize(&mut self) -> crate::meta_plugin::MetaPluginResponse { // If already finalized, don't process again if self.is_finalized { return crate::meta_plugin::MetaPluginResponse { metadata: Vec::new(), is_finalized: true, }; } let metadata = self.process_magic_types(); // Mark as finalized self.is_finalized = true; crate::meta_plugin::MetaPluginResponse { metadata, is_finalized: true, } } /// Updates the plugin with new data, accumulating for analysis. /// /// Buffers data up to `max_buffer_size`; triggers detection when full. /// /// # Arguments /// /// * `data` - Content chunk to buffer. /// /// # Returns /// /// A `MetaPluginResponse` with metadata on buffer full; finalizes then. /// /// # Examples /// /// ``` /// let mut plugin = MagicFileMetaPlugin::new(None, None); /// let response = plugin.update(b"content"); /// ``` fn update(&mut self, data: &[u8]) -> crate::meta_plugin::MetaPluginResponse { // If already finalized, don't process more data if self.is_finalized { return crate::meta_plugin::MetaPluginResponse { metadata: Vec::new(), is_finalized: true, }; } let mut metadata = Vec::new(); // Only collect up to max_buffer_size let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len()); if remaining_capacity > 0 { let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity); self.buffer.extend_from_slice(&data[..bytes_to_copy]); // Check if we've reached our buffer limit and return metadata if self.buffer.len() >= self.max_buffer_size { metadata = self.process_magic_types(); // Mark as finalized when we've processed enough data self.is_finalized = true; } } let is_finalized = !metadata.is_empty(); crate::meta_plugin::MetaPluginResponse { metadata, is_finalized, } } /// Returns the type of this meta plugin. /// /// # Returns /// /// `MetaPluginType::MagicFile`. fn meta_type(&self) -> MetaPluginType { MetaPluginType::MagicFile } /// Returns a reference to the outputs mapping. /// /// # Returns /// /// A reference to the `HashMap` of outputs. fn outputs(&self) -> &std::collections::HashMap { self.base.outputs() } /// Returns a mutable reference to the outputs mapping. /// /// # Returns /// /// A mutable reference to the `HashMap` of outputs. fn outputs_mut(&mut self) -> &mut std::collections::HashMap { self.base.outputs_mut() } /// Returns the default output names for this plugin. /// /// # Returns /// /// Vector of default output field names. fn default_outputs(&self) -> Vec { vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()] } /// Returns a reference to the options mapping. /// /// # Returns /// /// A reference to the `HashMap` of options. fn options(&self) -> &std::collections::HashMap { self.base.options() } /// Returns a mutable reference to the options mapping. /// /// # Returns /// /// A mutable reference to the `HashMap` of options. fn options_mut(&mut self) -> &mut std::collections::HashMap { self.base.options_mut() } } use crate::meta_plugin::register_meta_plugin; // Register the plugin at module initialization time #[ctor::ctor] fn register_magic_file_plugin() { register_meta_plugin(MetaPluginType::MagicFile, |options, outputs| { Box::new(MagicFileMetaPlugin::new(options, outputs)) }); }