diff --git a/src/compression_engine/gzip.rs b/src/compression_engine/gzip.rs index 80fa973..70e6708 100644 --- a/src/compression_engine/gzip.rs +++ b/src/compression_engine/gzip.rs @@ -11,12 +11,12 @@ use std::io::{Read, Write}; #[cfg(feature = "gzip")] use std::path::PathBuf; -#[cfg(feature = "gzip")] -use flate2::Compression; #[cfg(feature = "gzip")] use flate2::read::GzDecoder; #[cfg(feature = "gzip")] use flate2::write::GzEncoder; +#[cfg(feature = "gzip")] +use flate2::Compression; #[cfg(feature = "gzip")] use crate::compression_engine::CompressionEngine; diff --git a/src/compression_engine/lz4.rs b/src/compression_engine/lz4.rs index e2b82c5..84fd98c 100644 --- a/src/compression_engine/lz4.rs +++ b/src/compression_engine/lz4.rs @@ -1,23 +1,34 @@ +#[cfg(feature = "lz4")] use anyhow::Result; +#[cfg(feature = "lz4")] use log::*; +#[cfg(feature = "lz4")] use std::io::Write; +#[cfg(feature = "lz4")] use lz4_flex::frame::{FrameDecoder, FrameEncoder}; +#[cfg(feature = "lz4")] use std::fs::File; +#[cfg(feature = "lz4")] use std::io::Read; +#[cfg(feature = "lz4")] use std::path::PathBuf; +#[cfg(feature = "lz4")] use crate::compression_engine::CompressionEngine; +#[cfg(feature = "lz4")] #[derive(Debug, Eq, PartialEq, Clone, Default)] pub struct CompressionEngineLZ4 {} +#[cfg(feature = "lz4")] impl CompressionEngineLZ4 { pub fn new() -> CompressionEngineLZ4 { CompressionEngineLZ4 {} } } +#[cfg(feature = "lz4")] impl CompressionEngine for CompressionEngineLZ4 { fn open(&self, file_path: PathBuf) -> Result> { debug!("COMPRESSION: Opening {:?} using {:?}", file_path, *self); diff --git a/src/compression_engine/mod.rs b/src/compression_engine/mod.rs index 43486be..a05647c 100644 --- a/src/compression_engine/mod.rs +++ b/src/compression_engine/mod.rs @@ -1,4 +1,4 @@ -use anyhow::{Result, anyhow}; +use anyhow::{anyhow, Result}; use std::io; use std::io::{Read, Write}; use std::path::PathBuf; @@ -176,7 +176,11 @@ impl Clone for Box { lazy_static! { static ref COMPRESSION_ENGINES: EnumMap> = { let mut em = enum_map! { - CompressionType::LZ4 => Box::new(crate::compression_engine::lz4::CompressionEngineLZ4::new()) as Box, + CompressionType::LZ4 => Box::new(crate::compression_engine::program::CompressionEngineProgram::new( + "lz4", + vec!["-c"], + vec!["-d", "-c"] + )) as Box, CompressionType::GZip => Box::new(crate::compression_engine::program::CompressionEngineProgram::new( "gzip", vec!["-c"], @@ -207,6 +211,13 @@ lazy_static! { as Box; } + #[cfg(feature = "lz4")] + { + em[CompressionType::LZ4] = + Box::new(crate::compression_engine::lz4::CompressionEngineLZ4::new()) + as Box; + } + em }; } diff --git a/src/compression_engine/program.rs b/src/compression_engine/program.rs index 506cd8e..bf99a65 100644 --- a/src/compression_engine/program.rs +++ b/src/compression_engine/program.rs @@ -1,4 +1,4 @@ -use anyhow::{Context, Result, anyhow}; +use anyhow::{anyhow, Context, Result}; use log::*; use std::fs::File; use std::io::{Read, Write}; diff --git a/src/meta_plugin/magic.rs b/src/meta_plugin/magic.rs deleted file mode 100644 index 7f5a37f..0000000 --- a/src/meta_plugin/magic.rs +++ /dev/null @@ -1,366 +0,0 @@ -use magic::{Cookie, CookieFlags}; -use std::io; - -use crate::common::PIPESIZE; - -use crate::meta_plugin::{MetaPlugin, MetaPluginType}; - -#[derive(Debug)] -pub struct MagicFileMetaPlugin { - buffer: Vec, - max_buffer_size: usize, - is_finalized: bool, - cookie: Option, - base: crate::meta_plugin::BaseMetaPlugin, -} - -impl MagicFileMetaPlugin { - pub fn new( - options: Option>, - outputs: Option>, - ) -> MagicFileMetaPlugin { - // Start with default options - let mut final_options = std::collections::HashMap::new(); - final_options.insert( - "max_buffer_size".to_string(), - serde_yaml::Value::Number(PIPESIZE.into()), - ); - if let Some(opts) = options { - for (key, value) in opts { - final_options.insert(key, value); - } - } - - // Start with default outputs - let mut final_outputs = std::collections::HashMap::new(); - let default_outputs = vec![ - "mime_type".to_string(), - "mime_encoding".to_string(), - "file_type".to_string(), - ]; - for output_name in default_outputs { - final_outputs.insert(output_name.clone(), serde_yaml::Value::String(output_name)); - } - if let Some(outs) = outputs { - for (key, value) in outs { - final_outputs.insert(key, value); - } - } - - let max_buffer_size = final_options - .get("max_buffer_size") - .and_then(|v| v.as_u64()) - .unwrap_or(PIPESIZE as u64) as usize; - - // Ensure the default max_buffer_size is in the options - if !final_options.contains_key("max_buffer_size") { - final_options.insert( - "max_buffer_size".to_string(), - serde_yaml::Value::Number(PIPESIZE.into()), - ); - } - - let mut base = crate::meta_plugin::BaseMetaPlugin::new(); - base.outputs = final_outputs; - base.options = final_options; - - MagicFileMetaPlugin { - buffer: Vec::new(), - max_buffer_size, - is_finalized: false, - cookie: None, - base, - } - } - - fn get_magic_result(&self, flags: CookieFlags) -> io::Result { - // Use the existing cookie and just change flags - if let Some(cookie) = &self.cookie { - cookie.set_flags(flags).map_err(|e| { - io::Error::new( - io::ErrorKind::Other, - format!("Failed to set magic flags: {}", e), - ) - })?; - - let result = cookie.buffer(&self.buffer).map_err(|e| { - io::Error::new( - io::ErrorKind::Other, - format!("Failed to analyze buffer: {}", e), - ) - })?; - - // Clean up the result - remove extra whitespace and take first part if needed - let trimmed = result.trim(); - - // For some magic results, we might want just the first part before semicolon or comma - let cleaned = if trimmed.contains(';') { - trimmed.split(';').next().unwrap_or(trimmed).trim() - } else if trimmed.contains(',') - && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) - { - trimmed.split(',').next().unwrap_or(trimmed).trim() - } else { - trimmed - }; - - Ok(cleaned.to_string()) - } else { - Err(io::Error::new( - io::ErrorKind::Other, - "Magic cookie not initialized", - )) - } - } - - /// Helper function to process all magic types and collect metadata - fn process_magic_types(&self) -> Vec { - let mut metadata = Vec::new(); - - // Define the types to process with their corresponding flags - let types_to_process = [ - ("mime_type", CookieFlags::MIME_TYPE), - ("mime_encoding", CookieFlags::MIME_ENCODING), - ("file_type", CookieFlags::default()), - ]; - - for (name, flags) in types_to_process.iter() { - if let Ok(result) = self.get_magic_result(*flags) { - if !result.is_empty() { - // Use process_metadata_outputs to handle output mapping - if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( - name, - serde_yaml::Value::String(result), - self.base.outputs(), - ) { - metadata.push(meta_data); - } - } - } - } - - metadata - } -} - -impl MetaPlugin for MagicFileMetaPlugin { - /// Checks if the plugin has been finalized. - /// - /// # Returns - /// - /// `true` if finalized, `false` otherwise. - fn is_finalized(&self) -> bool { - self.is_finalized - } - - /// Sets the finalized state of the plugin. - /// - /// # Arguments - /// - /// * `finalized` - The new finalized state. - fn set_finalized(&mut self, finalized: bool) { - self.is_finalized = finalized; - } - - /// Initializes the magic cookie for file type detection. - /// - /// Loads the magic database; finalizes if initialization fails. - /// - /// # Returns - /// - /// A `MetaPluginResponse` with empty metadata; `is_finalized` is `true` on failure. - /// - /// # Errors - /// - /// Logs errors; returns finalized response on cookie or load failure. - /// - /// # Examples - /// - /// ``` - /// let mut plugin = MagicFileMetaPlugin::new(None, None); - /// let response = plugin.initialize(); - /// ``` - fn initialize(&mut self) -> crate::meta_plugin::MetaPluginResponse { - // Initialize the magic cookie once - let cookie = match Cookie::open(Default::default()) { - Ok(cookie) => cookie, - Err(_e) => { - return crate::meta_plugin::MetaPluginResponse { - metadata: Vec::new(), - is_finalized: true, - }; - } - }; - if let Err(_e) = cookie.load(&[] as &[&str]) { - return crate::meta_plugin::MetaPluginResponse { - metadata: Vec::new(), - is_finalized: true, - }; - } - self.cookie = Some(cookie); - - crate::meta_plugin::MetaPluginResponse { - metadata: Vec::new(), - is_finalized: false, - } - } - - /// Finalizes the plugin and performs file type detection. - /// - /// Analyzes the accumulated buffer and outputs detected types. - /// - /// # Returns - /// - /// A `MetaPluginResponse` with detection metadata and finalized state set to `true`. - /// - /// # Examples - /// - /// ``` - /// let mut plugin = MagicFileMetaPlugin::new(None, None); - /// // ... after updates - /// let response = plugin.finalize(); - /// assert!(response.is_finalized); - /// ``` - fn finalize(&mut self) -> crate::meta_plugin::MetaPluginResponse { - // If already finalized, don't process again - if self.is_finalized { - return crate::meta_plugin::MetaPluginResponse { - metadata: Vec::new(), - is_finalized: true, - }; - } - - let metadata = self.process_magic_types(); - - // Mark as finalized - self.is_finalized = true; - - crate::meta_plugin::MetaPluginResponse { - metadata, - is_finalized: true, - } - } - - /// Updates the plugin with new data, accumulating for analysis. - /// - /// Buffers data up to `max_buffer_size`; triggers detection when full. - /// - /// # Arguments - /// - /// * `data` - Content chunk to buffer. - /// - /// # Returns - /// - /// A `MetaPluginResponse` with metadata on buffer full; finalizes then. - /// - /// # Examples - /// - /// ``` - /// let mut plugin = MagicFileMetaPlugin::new(None, None); - /// let response = plugin.update(b"content"); - /// ``` - fn update(&mut self, data: &[u8]) -> crate::meta_plugin::MetaPluginResponse { - // If already finalized, don't process more data - if self.is_finalized { - return crate::meta_plugin::MetaPluginResponse { - metadata: Vec::new(), - is_finalized: true, - }; - } - - let mut metadata = Vec::new(); - - // Only collect up to max_buffer_size - let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len()); - if remaining_capacity > 0 { - let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity); - self.buffer.extend_from_slice(&data[..bytes_to_copy]); - - // Check if we've reached our buffer limit and return metadata - if self.buffer.len() >= self.max_buffer_size { - metadata = self.process_magic_types(); - - // Mark as finalized when we've processed enough data - self.is_finalized = true; - } - } - - let is_finalized = !metadata.is_empty(); - crate::meta_plugin::MetaPluginResponse { - metadata, - is_finalized, - } - } - - /// Returns the type of this meta plugin. - /// - /// # Returns - /// - /// `MetaPluginType::MagicFile`. - fn meta_type(&self) -> MetaPluginType { - MetaPluginType::MagicFile - } - - /// Returns a reference to the outputs mapping. - /// - /// # Returns - /// - /// A reference to the `HashMap` of outputs. - fn outputs(&self) -> &std::collections::HashMap { - self.base.outputs() - } - - /// Returns a mutable reference to the outputs mapping. - /// - /// # Returns - /// - /// A mutable reference to the `HashMap` of outputs. - fn outputs_mut( - &mut self, - ) -> anyhow::Result<&mut std::collections::HashMap> { - Ok(self.base.outputs_mut()) - } - - /// Returns the default output names for this plugin. - /// - /// # Returns - /// - /// Vector of default output field names. - fn default_outputs(&self) -> Vec { - vec![ - "mime_type".to_string(), - "mime_encoding".to_string(), - "file_type".to_string(), - ] - } - - /// Returns a reference to the options mapping. - /// - /// # Returns - /// - /// A reference to the `HashMap` of options. - fn options(&self) -> &std::collections::HashMap { - self.base.options() - } - - /// Returns a mutable reference to the options mapping. - /// - /// # Returns - /// - /// A mutable reference to the `HashMap` of options. - fn options_mut( - &mut self, - ) -> anyhow::Result<&mut std::collections::HashMap> { - Ok(self.base.options_mut()) - } -} - -use crate::meta_plugin::register_meta_plugin; - -// Register the plugin at module initialization time -#[ctor::ctor] -fn register_magic_file_plugin() { - register_meta_plugin(MetaPluginType::MagicFile, |options, outputs| { - Box::new(MagicFileMetaPlugin::new(options, outputs)) - }); -} diff --git a/src/tests/compression/lz4_tests.rs b/src/tests/compression/lz4_tests.rs index b01e833..b9cec15 100644 --- a/src/tests/compression/lz4_tests.rs +++ b/src/tests/compression/lz4_tests.rs @@ -1,4 +1,5 @@ #[cfg(test)] +#[cfg(feature = "lz4")] mod tests { use crate::compression_engine::lz4::CompressionEngineLZ4; use crate::tests::common::test_helpers::test_compression_engine;