diff --git a/Cargo.toml b/Cargo.toml index 42c7630..018660b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ derive_more = { version = "2.0", features = ["full"] } smart-default = "0.7" thiserror = "1.0" base64 = "0.22.1" -chrono = "0.4.26" +chrono = { version = "0.4.26", features = ["serde"] } clap = { version = "4.3.10", features = ["derive", "env"] } config = "0.14.0" ctor = "0.2" diff --git a/src/lib.rs b/src/lib.rs index 25e6443..386ec3f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,7 @@ +#![deny(clippy::all)] +#![deny(unsafe_code)] +#![allow(unused_imports)] + // Re-export modules for testing pub mod common; pub mod compression_engine; @@ -10,7 +14,6 @@ pub mod modes; pub mod plugins; pub mod args; pub mod parser; -pub mod utils; // Re-export Args struct for library usage pub use args::Args; @@ -26,7 +29,7 @@ use filter_plugin::{ // Import all meta plugins to ensure they register themselves #[allow(unused_imports)] use meta_plugin::{ - magic, cwd, text, user, shell, shell_pid, keep_pid, digest, + magic_file, cwd, text, user, shell, shell_pid, keep_pid, digest, read_time, read_rate, hostname, exec, env }; diff --git a/src/meta_plugin/magic_file.rs b/src/meta_plugin/magic_file.rs new file mode 100644 index 0000000..ffd0aeb --- /dev/null +++ b/src/meta_plugin/magic_file.rs @@ -0,0 +1,245 @@ +use magic::{Cookie, CookieFlags}; +use std::io; + +use crate::common::PIPESIZE; + +use crate::meta_plugin::{MetaPlugin, MetaPluginType}; + +#[derive(Debug)] +pub struct MagicFileMetaPlugin { + buffer: Vec, + max_buffer_size: usize, + is_finalized: bool, + cookie: Option, + base: crate::meta_plugin::BaseMetaPlugin, +} + +impl MagicFileMetaPlugin { + pub fn new( + options: Option>, + outputs: Option>, + ) -> MagicFileMetaPlugin { + // Start with default options + let mut final_options = std::collections::HashMap::new(); + final_options.insert("max_buffer_size".to_string(), serde_yaml::Value::Number(PIPESIZE.into())); + if let Some(opts) = options { + for (key, value) in opts { + final_options.insert(key, value); + } + } + + // Start with default outputs + let mut final_outputs = std::collections::HashMap::new(); + let default_outputs = vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()]; + for output_name in default_outputs { + final_outputs.insert(output_name.clone(), serde_yaml::Value::String(output_name)); + } + if let Some(outs) = outputs { + for (key, value) in outs { + final_outputs.insert(key, value); + } + } + + let max_buffer_size = final_options.get("max_buffer_size") + .and_then(|v| v.as_u64()) + .unwrap_or(PIPESIZE as u64) as usize; + + // Ensure the default max_buffer_size is in the options + if !final_options.contains_key("max_buffer_size") { + final_options.insert("max_buffer_size".to_string(), serde_yaml::Value::Number(PIPESIZE.into())); + } + + let mut base = crate::meta_plugin::BaseMetaPlugin::new(); + base.outputs = final_outputs; + base.options = final_options; + + MagicFileMetaPlugin { + buffer: Vec::new(), + max_buffer_size, + is_finalized: false, + cookie: None, + base, + } + } + + + fn get_magic_result(&self, flags: CookieFlags) -> io::Result { + // Use the existing cookie and just change flags + if let Some(cookie) = &self.cookie { + cookie.set_flags(flags) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to set magic flags: {}", e)))?; + + let result = cookie.buffer(&self.buffer) + .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?; + + // Clean up the result - remove extra whitespace and take first part if needed + let trimmed = result.trim(); + + // For some magic results, we might want just the first part before semicolon or comma + let cleaned = if trimmed.contains(';') { + trimmed.split(';').next().unwrap_or(trimmed).trim() + } else if trimmed.contains(',') && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) { + trimmed.split(',').next().unwrap_or(trimmed).trim() + } else { + trimmed + }; + + Ok(cleaned.to_string()) + } else { + Err(io::Error::new(io::ErrorKind::Other, "Magic cookie not initialized")) + } + } + + /// Helper function to process all magic types and collect metadata + fn process_magic_types(&self) -> Vec { + let mut metadata = Vec::new(); + + // Define the types to process with their corresponding flags + let types_to_process = [ + ("mime_type", CookieFlags::MIME_TYPE), + ("mime_encoding", CookieFlags::MIME_ENCODING), + ("file_type", CookieFlags::default()), + ]; + + for (name, flags) in types_to_process.iter() { + if let Ok(result) = self.get_magic_result(*flags) { + if !result.is_empty() { + // Use process_metadata_outputs to handle output mapping + if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( + name, + serde_yaml::Value::String(result), + self.base.outputs() + ) { + metadata.push(meta_data); + } + } + } + } + + metadata + } +} + +impl MetaPlugin for MagicFileMetaPlugin { + fn is_finalized(&self) -> bool { + self.is_finalized + } + + fn set_finalized(&mut self, finalized: bool) { + self.is_finalized = finalized; + } + + fn initialize(&mut self) -> crate::meta_plugin::MetaPluginResponse { + // Initialize the magic cookie once + let cookie = match Cookie::open(Default::default()) { + Ok(cookie) => cookie, + Err(_e) => { + return crate::meta_plugin::MetaPluginResponse { + metadata: Vec::new(), + is_finalized: true, + }; + } + }; + if let Err(_e) = cookie.load(&[] as &[&str]) { + return crate::meta_plugin::MetaPluginResponse { + metadata: Vec::new(), + is_finalized: true, + }; + } + self.cookie = Some(cookie); + + crate::meta_plugin::MetaPluginResponse { + metadata: Vec::new(), + is_finalized: false, + } + } + + fn finalize(&mut self) -> crate::meta_plugin::MetaPluginResponse { + // If already finalized, don't process again + if self.is_finalized { + return crate::meta_plugin::MetaPluginResponse { + metadata: Vec::new(), + is_finalized: true, + }; + } + + let metadata = self.process_magic_types(); + + // Mark as finalized + self.is_finalized = true; + + crate::meta_plugin::MetaPluginResponse { + metadata, + is_finalized: true, + } + } + + fn update(&mut self, data: &[u8]) -> crate::meta_plugin::MetaPluginResponse { + // If already finalized, don't process more data + if self.is_finalized { + return crate::meta_plugin::MetaPluginResponse { + metadata: Vec::new(), + is_finalized: true, + }; + } + + let mut metadata = Vec::new(); + + // Only collect up to max_buffer_size + let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len()); + if remaining_capacity > 0 { + let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity); + self.buffer.extend_from_slice(&data[..bytes_to_copy]); + + // Check if we've reached our buffer limit and return metadata + if self.buffer.len() >= self.max_buffer_size { + metadata = self.process_magic_types(); + + // Mark as finalized when we've processed enough data + self.is_finalized = true; + } + } + + let is_finalized = !metadata.is_empty(); + crate::meta_plugin::MetaPluginResponse { + metadata, + is_finalized, + } + } + + fn meta_type(&self) -> MetaPluginType { + MetaPluginType::MagicFile + } + + + fn outputs(&self) -> &std::collections::HashMap { + self.base.outputs() + } + + fn outputs_mut(&mut self) -> &mut std::collections::HashMap { + self.base.outputs_mut() + } + + fn default_outputs(&self) -> Vec { + vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()] + } + + + fn options(&self) -> &std::collections::HashMap { + self.base.options() + } + + fn options_mut(&mut self) -> &mut std::collections::HashMap { + self.base.options_mut() + } +} + +use crate::meta_plugin::register_meta_plugin; + +// Register the plugin at module initialization time +#[ctor::ctor] +fn register_magic_file_plugin() { + register_meta_plugin(MetaPluginType::MagicFile, |options, outputs| { + Box::new(MagicFileMetaPlugin::new(options, outputs)) + }); +} diff --git a/src/meta_plugin/mod.rs b/src/meta_plugin/mod.rs index 54d9f60..2e765cc 100644 --- a/src/meta_plugin/mod.rs +++ b/src/meta_plugin/mod.rs @@ -6,7 +6,7 @@ use once_cell::sync::Lazy; pub mod exec; pub mod digest; -pub mod magic; +pub mod magic_file; pub mod text; pub mod read_time; pub mod read_rate; @@ -20,7 +20,7 @@ pub mod env; pub use exec::MetaPluginExec; pub use digest::DigestMetaPlugin; -pub use magic::MagicFileMetaPlugin; +pub use magic_file::MagicFileMetaPlugin; pub use text::TextMetaPlugin; pub use read_time::ReadTimeMetaPlugin; pub use read_rate::ReadRateMetaPlugin;