diff --git a/src/meta_plugin/magic_file.rs b/src/meta_plugin/magic_file.rs index e632628..80e41d9 100644 --- a/src/meta_plugin/magic_file.rs +++ b/src/meta_plugin/magic_file.rs @@ -1,15 +1,16 @@ #[cfg(feature = "magic")] use magic::{Cookie, CookieFlags}; #[cfg(not(feature = "magic"))] -use std::process::{Command, Stdio}; -use std::io::{self, Write}; +use std::process::{Command, Stdio, Output}; #[cfg(not(feature = "magic"))] use which::which; +use std::io::{self, Write}; use log::debug; +use serde_yaml; use crate::common::PIPESIZE; -use crate::meta_plugin::{MetaPlugin, MetaPluginType, BaseMetaPlugin, MetaPluginResponse, MetaData}; +use crate::meta_plugin::{MetaPlugin, MetaPluginType, BaseMetaPlugin, MetaPluginResponse, MetaData, process_metadata_outputs}; #[cfg(feature = "magic")] #[derive(Debug)] @@ -70,28 +71,416 @@ impl MagicFileMetaPlugin { } } - fn get_magic_result(&self, flags: CookieFlags) -> io::Result { - // Use the existing cookie and just change flags - if let Some(cookie) = &self.cookie { - cookie.set_flags(flags) - .map_err(|e| io::Error::other(format!("Failed to set magic flags: {}", e)))?; + if self.buffer.is_empty() { + return Ok("empty".to_string()); + } - let result = cookie.buffer(&self.buffer) - .map_err(|e| io::Error::other(format!("Failed to analyze buffer: {}", e)))?; - - // Clean up the result - remove extra whitespace and take first part if needed - let trimmed = result.trim(); - - // For some magic results, we might want just the first part before semicolon or comma - let cleaned = if trimmed.contains(';') { - trimmed.split(';').next().unwrap_or(trimmed).trim() - } else if trimmed.contains(',') && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) { - trimmed.split(',').next().unwrap_or(trimmed).trim() - } else { - trimmed - }; - - Ok(cleaned.to_string()) + let cookie = if let Some(c) = &self.cookie { + c.set_flags(flags) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to set magic flags: {}", e)))?; + c } else { - Err(io::Error::other("Magic cookie not \ No newline at end of file + return Err(io::Error::new(io::ErrorKind::Other, "Magic cookie not initialized")); + }; + + let result = cookie.buffer(&self.buffer) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?; + + // Clean up the result - remove extra whitespace and take first part if needed + let trimmed = result.trim(); + + // For some magic results, we might want just the first part before semicolon or comma + let cleaned = if trimmed.contains(';') { + trimmed.split(';').next().unwrap_or(trimmed).trim().to_string() + } else if trimmed.contains(',') && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) { + trimmed.split(',').next().unwrap_or(trimmed).trim().to_string() + } else { + trimmed.to_string() + }; + + Ok(cleaned) + } +} + +#[cfg(feature = "magic")] +impl MetaPlugin for MagicFileMetaPlugin { + fn meta_type(&self) -> MetaPluginType { + MetaPluginType::MagicFile + } + + fn is_supported(&self) -> bool { + true + } + + fn is_internal(&self) -> bool { + true + } + + fn is_finalized(&self) -> bool { + self.is_finalized + } + + fn initialize(&mut self) -> MetaPluginResponse { + self.is_finalized = false; + MetaPluginResponse::default() + } + + fn update(&mut self, data: &[u8]) -> MetaPluginResponse { + if self.buffer.len() + data.len() > self.max_buffer_size { + // Truncate to max size, keeping the beginning + let additional_space = self.max_buffer_size.saturating_sub(self.buffer.len()); + if additional_space > 0 { + self.buffer.extend_from_slice(&data[..additional_space.min(data.len())]); + } + } else { + self.buffer.extend_from_slice(data); + } + MetaPluginResponse::default() + } + + fn finalize(&mut self) -> MetaPluginResponse { + let mut metadata = Vec::new(); + let mut response = MetaPluginResponse { + metadata, + is_finalized: true, + }; + + if self.buffer.is_empty() { + self.is_finalized = true; + return response; + } + + // Initialize cookie if not already done + if self.cookie.is_none() { + match Cookie::open(CookieFlags::default()) { + Ok(cookie) => { + self.cookie = Some(cookie); + } + Err(e) => { + debug!("META: Failed to initialize magic cookie: {}", e); + self.is_finalized = true; + return response; + } + } + } + + // Process mime_type + if let Some(_) = self.base.outputs.get("mime_type") { + match self.get_magic_result(CookieFlags::MIME_TYPE) { + Ok(mime_type) => { + if let Some(meta_data) = process_metadata_outputs( + "mime_type", + serde_yaml::Value::String(mime_type), + &self.base.outputs, + ) { + response.metadata.push(meta_data); + } + } + Err(e) => debug!("META: Failed to get MIME type: {}", e), + } + } + + // Process mime_encoding + if let Some(_) = self.base.outputs.get("mime_encoding") { + match self.get_magic_result(CookieFlags::MIME_ENCODING) { + Ok(mime_encoding) => { + if let Some(meta_data) = process_metadata_outputs( + "mime_encoding", + serde_yaml::Value::String(mime_encoding), + &self.base.outputs, + ) { + response.metadata.push(meta_data); + } + } + Err(e) => debug!("META: Failed to get MIME encoding: {}", e), + } + } + + // Process file_type (description) + if let Some(_) = self.base.outputs.get("file_type") { + match self.get_magic_result(CookieFlags::empty()) { + Ok(file_type) => { + if let Some(meta_data) = process_metadata_outputs( + "file_type", + serde_yaml::Value::String(file_type), + &self.base.outputs, + ) { + response.metadata.push(meta_data); + } + } + Err(e) => debug!("META: Failed to get file type: {}", e), + } + } + + self.is_finalized = true; + response + } + + fn outputs(&self) -> &std::collections::HashMap { + &self.base.outputs + } + + fn outputs_mut(&mut self) -> &mut std::collections::HashMap { + &mut self.base.outputs + } + + fn options(&self) -> &std::collections::HashMap { + &self.base.options + } + + fn options_mut(&mut self) -> &mut std::collections::HashMap { + &mut self.base.options + } + + fn default_outputs(&self) -> Vec { + vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()] + } +} + +#[cfg(not(feature = "magic"))] +#[derive(Debug)] +pub struct FallbackMagicFileMetaPlugin { + buffer: Vec, + max_buffer_size: usize, + supported: bool, + is_finalized: bool, + base: BaseMetaPlugin, +} + +#[cfg(not(feature = "magic"))] +impl FallbackMagicFileMetaPlugin { + pub fn new( + options: Option>, + outputs: Option>, + ) -> FallbackMagicFileMetaPlugin { + let supported = which("file").is_ok(); + + // Start with default options + let mut final_options = std::collections::HashMap::new(); + final_options.insert("max_buffer_size".to_string(), serde_yaml::Value::Number(PIPESIZE.into())); + if let Some(opts) = options { + for (key, value) in opts { + final_options.insert(key, value); + } + } + + // Start with default outputs + let mut final_outputs = std::collections::HashMap::new(); + let default_outputs = vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()]; + for output_name in default_outputs { + final_outputs.insert(output_name.clone(), serde_yaml::Value::String(output_name)); + } + if let Some(outs) = outputs { + for (key, value) in outs { + final_outputs.insert(key, value); + } + } + + let max_buffer_size = final_options.get("max_buffer_size") + .and_then(|v| v.as_u64()) + .unwrap_or(PIPESIZE as u64) as usize; + + let mut base = BaseMetaPlugin::new(); + base.outputs = final_outputs; + base.options = final_options; + + FallbackMagicFileMetaPlugin { + buffer: Vec::new(), + max_buffer_size, + supported, + is_finalized: false, + base, + } + } + + fn run_file_command(&self, args: &[&str]) -> io::Result { + if self.buffer.is_empty() { + return Ok("empty".to_string()); + } + + let mut cmd = Command::new("file"); + for arg in args { + cmd.arg(arg); + } + cmd.arg("-").stdin(Stdio::piped()).stdout(Stdio::piped()).stderr(Stdio::piped()); + + let mut child = cmd.spawn() + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to spawn file command: {}", e)))?; + + { + let stdin = child.stdin.as_mut().unwrap(); + stdin.write_all(&self.buffer) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to write to file stdin: {}", e)))?; + } + + let output = child.wait_with_output() + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to wait on file command: {}", e)))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(io::Error::new(io::ErrorKind::Other, format!("File command failed: {}", stderr))); + } + + let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string(); + Ok(stdout) + } + + fn get_mime_info(&self) -> io::Result<(String, String)> { + let mime_output = self.run_file_command(&["-b", "--mime"])?; + if mime_output == "empty" { + return Ok(("application/octet-stream".to_string(), "binary".to_string())); + } + + let parts: Vec<&str> = mime_output.split(';').collect(); + let mime_type = if parts.is_empty() { + mime_output + } else { + parts[0].trim() + }.to_string(); + + let mime_encoding = if parts.len() > 1 { + parts[1].replace(" charset=", "").trim().to_string() + } else { + "binary".to_string() + }; + + Ok((mime_type, mime_encoding)) + } +} + +#[cfg(not(feature = "magic"))] +impl MetaPlugin for FallbackMagicFileMetaPlugin { + fn meta_type(&self) -> MetaPluginType { + MetaPluginType::MagicFile + } + + fn is_supported(&self) -> bool { + self.supported + } + + fn is_internal(&self) -> bool { + true + } + + fn is_finalized(&self) -> bool { + self.is_finalized + } + + fn initialize(&mut self) -> MetaPluginResponse { + self.is_finalized = false; + MetaPluginResponse::default() + } + + fn update(&mut self, data: &[u8]) -> MetaPluginResponse { + if self.buffer.len() + data.len() > self.max_buffer_size { + // Truncate to max size, keeping the beginning + let additional_space = self.max_buffer_size.saturating_sub(self.buffer.len()); + if additional_space > 0 { + self.buffer.extend_from_slice(&data[..additional_space.min(data.len())]); + } + } else { + self.buffer.extend_from_slice(data); + } + MetaPluginResponse::default() + } + + fn finalize(&mut self) -> MetaPluginResponse { + let mut metadata = Vec::new(); + let mut response = MetaPluginResponse { + metadata, + is_finalized: true, + }; + + if !self.supported || self.buffer.is_empty() { + self.is_finalized = true; + return response; + } + + // Process mime_type and mime_encoding from single mime command + match self.get_mime_info() { + Ok((mime_type, mime_encoding)) => { + if let Some(_) = self.base.outputs.get("mime_type") { + if let Some(meta_data) = process_metadata_outputs( + "mime_type", + serde_yaml::Value::String(mime_type.clone()), + &self.base.outputs, + ) { + response.metadata.push(meta_data); + } + } + + if let Some(_) = self.base.outputs.get("mime_encoding") { + if let Some(meta_data) = process_metadata_outputs( + "mime_encoding", + serde_yaml::Value::String(mime_encoding), + &self.base.outputs, + ) { + response.metadata.push(meta_data); + } + } + } + Err(e) => debug!("META: Failed to get MIME info with file command: {}", e), + } + + // Process file_type (description) + if let Some(_) = self.base.outputs.get("file_type") { + match self.run_file_command(&["-b"]) { + Ok(file_type) => { + if let Some(meta_data) = process_metadata_outputs( + "file_type", + serde_yaml::Value::String(file_type), + &self.base.outputs, + ) { + response.metadata.push(meta_data); + } + } + Err(e) => debug!("META: Failed to get file type with file command: {}", e), + } + } + + self.is_finalized = true; + response + } + + fn outputs(&self) -> &std::collections::HashMap { + &self.base.outputs + } + + fn outputs_mut(&mut self) -> &mut std::collections::HashMap { + &mut self.base.outputs + } + + fn options(&self) -> &std::collections::HashMap { + &self.base.options + } + + fn options_mut(&mut self) -> &mut std::collections::HashMap { + &mut self.base.options + } + + fn default_outputs(&self) -> Vec { + vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()] + } +} + +// Registration +#[cfg(feature = "magic")] +use crate::meta_plugin::{register_meta_plugin, MetaPluginType}; +#[cfg(feature = "magic")] +#[ctor::ctor] +fn register_magic_plugin() { + register_meta_plugin(MetaPluginType::MagicFile, |options, outputs| { + Box::new(MagicFileMetaPlugin::new(options, outputs)) + }); +} + +#[cfg(not(feature = "magic"))] +use crate::meta_plugin::{register_meta_plugin, MetaPluginType}; +#[cfg(not(feature = "magic"))] +#[ctor::ctor] +fn register_fallback_magic_plugin() { + register_meta_plugin(MetaPluginType::MagicFile, |options, outputs| { + Box::new(FallbackMagicFileMetaPlugin::new(options, outputs)) + }); +}