use anyhow::Result; use magic::{Cookie, CookieFlags}; use rusqlite::Connection; use std::io; use crate::common::PIPESIZE; use crate::meta_plugin::MetaPlugin; #[derive(Debug)] pub struct MagicFileMetaPlugin { buffer: Vec, max_buffer_size: usize, is_saved: bool, item_id: Option, cookie: Option, base: crate::meta_plugin::BaseMetaPlugin, } impl MagicFileMetaPlugin { pub fn new( options: Option>, outputs: Option>, ) -> MagicFileMetaPlugin { // Start with default options let mut final_options = std::collections::HashMap::new(); final_options.insert("max_buffer_size".to_string(), serde_yaml::Value::Number(PIPESIZE.into())); if let Some(opts) = options { for (key, value) in opts { final_options.insert(key, value); } } // Start with default outputs let mut final_outputs = std::collections::HashMap::new(); let default_outputs = vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()]; for output_name in default_outputs { final_outputs.insert(output_name.clone(), serde_yaml::Value::String(output_name)); } if let Some(outs) = outputs { for (key, value) in outs { final_outputs.insert(key, value); } } let max_buffer_size = final_options.get("max_buffer_size") .and_then(|v| v.as_u64()) .unwrap_or(PIPESIZE as u64) as usize; let mut base = crate::meta_plugin::BaseMetaPlugin::new(); base.outputs = final_outputs; base.options = final_options; MagicFileMetaPlugin { buffer: Vec::new(), max_buffer_size, is_saved: false, item_id: None, cookie: None, base, } } pub fn new_simple() -> MagicFileMetaPlugin { Self::new(None, None) } fn get_magic_result(&self, flags: CookieFlags) -> io::Result { // Use the existing cookie and just change flags if let Some(cookie) = &self.cookie { cookie.set_flags(flags) .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to set magic flags: {}", e)))?; let result = cookie.buffer(&self.buffer) .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?; // Clean up the result - remove extra whitespace and take first part if needed let trimmed = result.trim(); // For some magic results, we might want just the first part before semicolon or comma let cleaned = if trimmed.contains(';') { trimmed.split(';').next().unwrap_or(trimmed).trim() } else if trimmed.contains(',') && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) { trimmed.split(',').next().unwrap_or(trimmed).trim() } else { trimmed }; Ok(cleaned.to_string()) } else { Err(io::Error::new(io::ErrorKind::Other, "Magic cookie not initialized")) } } /// Helper function to process all magic types and collect metadata fn process_magic_types(&self, item_id: i64) -> Vec { let mut metadata = Vec::new(); // Define the types to process with their corresponding flags let types_to_process = [ ("mime_type", CookieFlags::MIME_TYPE), ("mime_encoding", CookieFlags::MIME_ENCODING), ("file_type", CookieFlags::default()), ]; for (name, flags) in types_to_process { if let Ok(result) = self.get_magic_result(flags) { if !result.is_empty() { // Use process_metadata_outputs to handle output mapping if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( name, result, self.base.outputs() ) { metadata.push(meta_data); } } } } metadata } } impl MetaPlugin for MagicFileMetaPlugin { fn initialize(&mut self, item_id: i64) -> crate::meta_plugin::MetaPluginResponse { self.item_id = Some(item_id); // Initialize the magic cookie once let cookie = match Cookie::open(Default::default()) { Ok(cookie) => cookie, Err(e) => { return crate::meta_plugin::MetaPluginResponse { metadata: Vec::new(), is_finalized: true, }; } }; if let Err(e) = cookie.load(&[] as &[&str]) { return crate::meta_plugin::MetaPluginResponse { metadata: Vec::new(), is_finalized: true, }; } self.cookie = Some(cookie); crate::meta_plugin::MetaPluginResponse { metadata: Vec::new(), is_finalized: false, } } fn finalize(&mut self) -> crate::meta_plugin::MetaPluginResponse { let metadata = if let Some(item_id) = self.item_id { self.process_magic_types(item_id) } else { Vec::new() }; crate::meta_plugin::MetaPluginResponse { metadata, is_finalized: true, } } fn update(&mut self, data: &[u8]) -> crate::meta_plugin::MetaPluginResponse { let mut metadata = Vec::new(); // Only collect up to max_buffer_size let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len()); if remaining_capacity > 0 { let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity); self.buffer.extend_from_slice(&data[..bytes_to_copy]); // Check if we've reached our buffer limit and return metadata if self.buffer.len() >= self.max_buffer_size { if let Some(item_id) = self.item_id { metadata = self.process_magic_types(item_id); } } } crate::meta_plugin::MetaPluginResponse { metadata, is_finalized: !metadata.is_empty(), } } fn meta_name(&self) -> String { "magic_file".to_string() } fn configure_options(&mut self, options: &std::collections::HashMap) -> Result<()> { if let Some(max_buffer_size) = options.get("max_buffer_size") { if let Some(size) = max_buffer_size.as_u64() { self.max_buffer_size = size as usize; } } Ok(()) } fn outputs(&self) -> &std::collections::HashMap { self.base.outputs() } fn outputs_mut(&mut self) -> &mut std::collections::HashMap { self.base.outputs_mut() } fn default_outputs(&self) -> Vec { vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()] } fn default_options(&self) -> std::collections::HashMap { let mut options = std::collections::HashMap::new(); options.insert("max_buffer_size".to_string(), serde_yaml::Value::Number(PIPESIZE.into())); options } fn options(&self) -> &std::collections::HashMap { self.base.options() } fn options_mut(&mut self) -> &mut std::collections::HashMap { self.base.options_mut() } }