diff --git a/Cargo.lock b/Cargo.lock index e986956..a58fa2f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -378,6 +378,17 @@ dependencies = [ "shlex", ] +[[package]] +name = "cfb" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38f2da7a0a2c4ccf0065be06397cc26a81f4e528be095826eee9d4adbb8c60f" +dependencies = [ + "byteorder", + "fnv", + "uuid", +] + [[package]] name = "cfg-if" version = "1.0.4" @@ -1020,6 +1031,12 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + [[package]] name = "flate2" version = "1.1.9" @@ -1553,6 +1570,15 @@ dependencies = [ "serde_core", ] +[[package]] +name = "infer" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a588916bfdfd92e71cacef98a63d9b1f0d74d6599980d11894290e7ddefffcf7" +dependencies = [ + "cfb", +] + [[package]] name = "inventory" version = "0.3.22" @@ -1686,6 +1712,7 @@ dependencies = [ "http-body-util", "humansize", "hyper", + "infer", "inventory", "is-terminal", "jsonwebtoken", @@ -1726,6 +1753,7 @@ dependencies = [ "tokio-util", "tower", "tower-http", + "tree_magic_mini", "ureq", "utoipa", "utoipa-swagger-ui", @@ -1975,6 +2003,15 @@ dependencies = [ "libc", ] +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + [[package]] name = "num-bigint" version = "0.4.6" @@ -2141,6 +2178,17 @@ dependencies = [ "sha2 0.10.9", ] +[[package]] +name = "petgraph" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" +dependencies = [ + "fixedbitset", + "hashbrown 0.15.5", + "indexmap", +] + [[package]] name = "pin-project-lite" version = "0.2.17" @@ -3184,6 +3232,17 @@ dependencies = [ "once_cell", ] +[[package]] +name = "tree_magic_mini" +version = "3.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8765b90061cba6c22b5831f675da109ae5561588290f9fa2317adab2714d5a6" +dependencies = [ + "memchr", + "nom", + "petgraph", +] + [[package]] name = "try-lock" version = "0.2.5" @@ -3372,6 +3431,16 @@ dependencies = [ "zip", ] +[[package]] +name = "uuid" +version = "1.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "uzers" version = "0.12.2" diff --git a/Cargo.toml b/Cargo.toml index b23b741..2e38f1b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,8 @@ log = "0.4" lz4_flex = { version = "0.12", optional = true } zstd = { version = "0.13", optional = true } magic = { version = "0.13", optional = true } +infer = { version = "0.19", optional = true } +tree_magic_mini = { version = "3.2", optional = true } nix = { version = "0.30", features = ["fs", "process"] } once_cell = "1.21" comfy-table = "7.2" @@ -83,11 +85,15 @@ tiktoken-rs = { version = "0.9", optional = true } [features] # Default features include core compression engines and swagger UI -default = ["magic", "lz4", "gzip", "client", "tokens"] - -# Full -#default = ["server", "magic", "lz4", "swagger"] - +default = [ + "client", + "gzip", + "infer", + "lz4", + "tokens", + "tree_magic_mini", + "zstd" +] # Server feature (includes axum and related dependencies) server = ["dep:axum", "dep:tower", "dep:tower-http", "dep:utoipa", "dep:jsonwebtoken"] @@ -100,11 +106,13 @@ xz = [] zstd = ["dep:zstd"] # Plugin features (meta and filter) -all-meta-plugins = ["dep:magic"] +all-meta-plugins = ["dep:magic", "dep:infer", "dep:tree_magic_mini"] all-filter-plugins = [] # Individual plugin features magic = ["dep:magic"] +infer = ["dep:infer"] +tree_magic_mini = ["dep:tree_magic_mini"] # Swagger UI feature swagger = ["dep:utoipa-swagger-ui"] diff --git a/bin/keep b/bin/keep new file mode 100755 index 0000000..d0bef82 Binary files /dev/null and b/bin/keep differ diff --git a/build-static.bash b/build-static.bash index 3c3da31..fccef19 100755 --- a/build-static.bash +++ b/build-static.bash @@ -2,7 +2,6 @@ set -ex -export RUSTFLAGS='-C target-feature=+crt-static' -cargo build --release --target x86_64-unknown-linux-gnu +cargo build --release --target x86_64-unknown-linux-musl mkdir -p bin -cp target/x86_64-unknown-linux-gnu/release/keep ./bin/ +cp target/x86_64-unknown-linux-musl/release/keep ./bin/ diff --git a/src/args.rs b/src/args.rs index e576e4f..c9b85d6 100644 --- a/src/args.rs +++ b/src/args.rs @@ -82,7 +82,7 @@ pub struct ModeArgs { pub generate_config: bool, #[arg(help_heading("Mode Options"), long, conflicts_with_all(["save", "get", "diff", "list", "delete", "info", "update", "status", "server", "generate_config", "export", "import"]))] - #[arg(help("Generate shell completion script (bash, zsh, fish, elvish, powershell)"))] + #[arg(help("Generate shell completion script"))] pub generate_completion: Option, #[arg(help_heading("Server Options"), long, env("KEEP_SERVER_ADDRESS"))] diff --git a/src/compression_engine/gzip.rs b/src/compression_engine/gzip.rs index 70e6708..80fa973 100644 --- a/src/compression_engine/gzip.rs +++ b/src/compression_engine/gzip.rs @@ -11,12 +11,12 @@ use std::io::{Read, Write}; #[cfg(feature = "gzip")] use std::path::PathBuf; +#[cfg(feature = "gzip")] +use flate2::Compression; #[cfg(feature = "gzip")] use flate2::read::GzDecoder; #[cfg(feature = "gzip")] use flate2::write::GzEncoder; -#[cfg(feature = "gzip")] -use flate2::Compression; #[cfg(feature = "gzip")] use crate::compression_engine::CompressionEngine; diff --git a/src/compression_engine/mod.rs b/src/compression_engine/mod.rs index 5253199..011fb90 100644 --- a/src/compression_engine/mod.rs +++ b/src/compression_engine/mod.rs @@ -1,4 +1,4 @@ -use anyhow::{anyhow, Result}; +use anyhow::{Result, anyhow}; use std::io; use std::io::{Read, Write}; use std::path::PathBuf; diff --git a/src/compression_engine/program.rs b/src/compression_engine/program.rs index bf99a65..506cd8e 100644 --- a/src/compression_engine/program.rs +++ b/src/compression_engine/program.rs @@ -1,4 +1,4 @@ -use anyhow::{anyhow, Context, Result}; +use anyhow::{Context, Result, anyhow}; use log::*; use std::fs::File; use std::io::{Read, Write}; diff --git a/src/lib.rs b/src/lib.rs index 80b279e..c9235ca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,6 +71,14 @@ use crate::meta_plugin::magic_file; #[allow(unused_imports)] use crate::meta_plugin::tokens; +#[cfg(feature = "infer")] +#[allow(unused_imports)] +use crate::meta_plugin::infer_plugin; + +#[cfg(feature = "tree_magic_mini")] +#[allow(unused_imports)] +use crate::meta_plugin::tree_magic_mini; + /// Initializes plugins at library load time. /// /// Plugin registration happens automatically via `#[ctor]` constructors diff --git a/src/meta_plugin/infer_plugin.rs b/src/meta_plugin/infer_plugin.rs new file mode 100644 index 0000000..9c7aac8 --- /dev/null +++ b/src/meta_plugin/infer_plugin.rs @@ -0,0 +1,177 @@ +use crate::common::PIPESIZE; +use crate::meta_plugin::{ + process_metadata_outputs, register_meta_plugin, BaseMetaPlugin, MetaPlugin, MetaPluginResponse, + MetaPluginType, +}; + +#[derive(Debug, Default)] +pub struct InferMetaPlugin { + buffer: Vec, + max_buffer_size: usize, + is_finalized: bool, + base: BaseMetaPlugin, +} + +impl InferMetaPlugin { + pub fn new( + options: Option>, + outputs: Option>, + ) -> InferMetaPlugin { + let mut base = BaseMetaPlugin::new(); + + if let Some(opts) = options { + for (key, value) in opts { + base.options.insert(key, value); + } + } + + let max_buffer_size = base + .options + .get("max_buffer_size") + .and_then(|v| v.as_u64()) + .unwrap_or(PIPESIZE as u64) as usize; + + base.outputs.insert( + "infer_mime_type".to_string(), + serde_yaml::Value::String("infer_mime_type".to_string()), + ); + + if let Some(outs) = outputs { + for (key, value) in outs { + base.outputs.insert(key, value); + } + } + + InferMetaPlugin { + buffer: Vec::new(), + max_buffer_size, + is_finalized: false, + base, + } + } +} + +impl MetaPlugin for InferMetaPlugin { + fn meta_type(&self) -> MetaPluginType { + MetaPluginType::Infer + } + + fn is_finalized(&self) -> bool { + self.is_finalized + } + + fn set_finalized(&mut self, finalized: bool) { + self.is_finalized = finalized; + } + + fn set_save_meta(&mut self, save_meta: crate::meta_plugin::SaveMetaFn) { + self.base.set_save_meta(save_meta); + } + + fn save_meta(&self, name: &str, value: &str) { + self.base.save_meta(name, value); + } + + fn update(&mut self, data: &[u8]) -> MetaPluginResponse { + if self.is_finalized { + return MetaPluginResponse { + metadata: Vec::new(), + is_finalized: true, + }; + } + + let remaining = self.max_buffer_size.saturating_sub(self.buffer.len()); + let to_add = &data[..data.len().min(remaining)]; + self.buffer.extend_from_slice(to_add); + + if self.buffer.len() >= self.max_buffer_size { + let mime_type = infer::get(&self.buffer) + .map(|kind| kind.mime_type().to_string()) + .unwrap_or_else(|| "application/octet-stream".to_string()); + + self.is_finalized = true; + + let metadata = process_metadata_outputs( + "infer_mime_type", + serde_yaml::Value::String(mime_type), + self.base.outputs(), + ) + .map(|m| vec![m]) + .unwrap_or_default(); + + return MetaPluginResponse { + metadata, + is_finalized: true, + }; + } + + MetaPluginResponse { + metadata: Vec::new(), + is_finalized: false, + } + } + + fn finalize(&mut self) -> MetaPluginResponse { + if self.is_finalized { + return MetaPluginResponse { + metadata: Vec::new(), + is_finalized: true, + }; + } + + let mime_type = infer::get(&self.buffer) + .map(|kind| kind.mime_type().to_string()) + .unwrap_or_else(|| "application/octet-stream".to_string()); + + self.is_finalized = true; + + let metadata = process_metadata_outputs( + "infer_mime_type", + serde_yaml::Value::String(mime_type), + self.base.outputs(), + ) + .map(|m| vec![m]) + .unwrap_or_default(); + + MetaPluginResponse { + metadata, + is_finalized: true, + } + } + + fn outputs(&self) -> &std::collections::HashMap { + self.base.outputs() + } + + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.outputs_mut()) + } + + fn default_outputs(&self) -> Vec { + vec!["infer_mime_type".to_string()] + } + + fn options(&self) -> &std::collections::HashMap { + self.base.options() + } + + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.options_mut()) + } + + fn parallel_safe(&self) -> bool { + true + } +} + +#[ctor::ctor] +fn register_infer_plugin() { + register_meta_plugin(MetaPluginType::Infer, |options, outputs| { + Box::new(InferMetaPlugin::new(options, outputs)) + }) + .expect("Failed to register InferMetaPlugin"); +} diff --git a/src/meta_plugin/mod.rs b/src/meta_plugin/mod.rs index 4a044e8..3e1f7d2 100644 --- a/src/meta_plugin/mod.rs +++ b/src/meta_plugin/mod.rs @@ -9,6 +9,8 @@ pub mod digest; pub mod env; pub mod exec; pub mod hostname; +#[cfg(feature = "infer")] +pub mod infer_plugin; pub mod keep_pid; pub mod magic_file; pub mod read_rate; @@ -18,6 +20,8 @@ pub mod shell_pid; pub mod text; #[cfg(feature = "tokens")] pub mod tokens; +#[cfg(feature = "tree_magic_mini")] +pub mod tree_magic_mini; pub mod user; pub use digest::DigestMetaPlugin; @@ -28,11 +32,15 @@ pub use magic_file::MagicFileMetaPlugin; pub use cwd::CwdMetaPlugin; pub use env::EnvMetaPlugin; pub use hostname::HostnameMetaPlugin; +#[cfg(feature = "infer")] +pub use infer_plugin::InferMetaPlugin; pub use keep_pid::KeepPidMetaPlugin; pub use read_rate::ReadRateMetaPlugin; pub use read_time::ReadTimeMetaPlugin; pub use shell::ShellMetaPlugin; pub use shell_pid::ShellPidMetaPlugin; +#[cfg(feature = "tree_magic_mini")] +pub use tree_magic_mini::TreeMagicMiniMetaPlugin; pub use user::UserMetaPlugin; #[cfg(not(feature = "magic"))] @@ -263,6 +271,8 @@ pub enum MetaPluginType { Exec, Env, Tokens, + TreeMagicMini, + Infer, } /// Central function to handle metadata output with name mapping. diff --git a/src/meta_plugin/tree_magic_mini.rs b/src/meta_plugin/tree_magic_mini.rs new file mode 100644 index 0000000..ffbe2d2 --- /dev/null +++ b/src/meta_plugin/tree_magic_mini.rs @@ -0,0 +1,173 @@ +use crate::common::PIPESIZE; +use crate::meta_plugin::{ + process_metadata_outputs, register_meta_plugin, BaseMetaPlugin, MetaPlugin, MetaPluginResponse, + MetaPluginType, +}; + +#[derive(Debug, Default)] +pub struct TreeMagicMiniMetaPlugin { + buffer: Vec, + max_buffer_size: usize, + is_finalized: bool, + base: BaseMetaPlugin, +} + +impl TreeMagicMiniMetaPlugin { + pub fn new( + options: Option>, + outputs: Option>, + ) -> TreeMagicMiniMetaPlugin { + let mut base = BaseMetaPlugin::new(); + + if let Some(opts) = options { + for (key, value) in opts { + base.options.insert(key, value); + } + } + + let max_buffer_size = base + .options + .get("max_buffer_size") + .and_then(|v| v.as_u64()) + .unwrap_or(PIPESIZE as u64) as usize; + + base.outputs.insert( + "tree_magic_mime_type".to_string(), + serde_yaml::Value::String("tree_magic_mime_type".to_string()), + ); + + if let Some(outs) = outputs { + for (key, value) in outs { + base.outputs.insert(key, value); + } + } + + TreeMagicMiniMetaPlugin { + buffer: Vec::new(), + max_buffer_size, + is_finalized: false, + base, + } + } +} + +impl MetaPlugin for TreeMagicMiniMetaPlugin { + fn meta_type(&self) -> MetaPluginType { + MetaPluginType::TreeMagicMini + } + + fn is_finalized(&self) -> bool { + self.is_finalized + } + + fn set_finalized(&mut self, finalized: bool) { + self.is_finalized = finalized; + } + + fn set_save_meta(&mut self, save_meta: crate::meta_plugin::SaveMetaFn) { + self.base.set_save_meta(save_meta); + } + + fn save_meta(&self, name: &str, value: &str) { + self.base.save_meta(name, value); + } + + fn update(&mut self, data: &[u8]) -> MetaPluginResponse { + if self.is_finalized { + return MetaPluginResponse { + metadata: Vec::new(), + is_finalized: true, + }; + } + + let remaining = self.max_buffer_size.saturating_sub(self.buffer.len()); + let to_add = &data[..data.len().min(remaining)]; + self.buffer.extend_from_slice(to_add); + + if self.buffer.len() >= self.max_buffer_size { + let mime_type = tree_magic_mini::from_u8(&self.buffer); + + self.is_finalized = true; + + let metadata = process_metadata_outputs( + "tree_magic_mime_type", + serde_yaml::Value::String(mime_type.to_string()), + self.base.outputs(), + ) + .map(|m| vec![m]) + .unwrap_or_default(); + + return MetaPluginResponse { + metadata, + is_finalized: true, + }; + } + + MetaPluginResponse { + metadata: Vec::new(), + is_finalized: false, + } + } + + fn finalize(&mut self) -> MetaPluginResponse { + if self.is_finalized { + return MetaPluginResponse { + metadata: Vec::new(), + is_finalized: true, + }; + } + + let mime_type = tree_magic_mini::from_u8(&self.buffer); + + self.is_finalized = true; + + let metadata = process_metadata_outputs( + "tree_magic_mime_type", + serde_yaml::Value::String(mime_type.to_string()), + self.base.outputs(), + ) + .map(|m| vec![m]) + .unwrap_or_default(); + + MetaPluginResponse { + metadata, + is_finalized: true, + } + } + + fn outputs(&self) -> &std::collections::HashMap { + self.base.outputs() + } + + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.outputs_mut()) + } + + fn default_outputs(&self) -> Vec { + vec!["tree_magic_mime_type".to_string()] + } + + fn options(&self) -> &std::collections::HashMap { + self.base.options() + } + + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.options_mut()) + } + + fn parallel_safe(&self) -> bool { + true + } +} + +#[ctor::ctor] +fn register_tree_magic_mini_plugin() { + register_meta_plugin(MetaPluginType::TreeMagicMini, |options, outputs| { + Box::new(TreeMagicMiniMetaPlugin::new(options, outputs)) + }) + .expect("Failed to register TreeMagicMiniMetaPlugin"); +} diff --git a/src/services/compression_service.rs b/src/services/compression_service.rs index 775e6f1..0976b32 100644 --- a/src/services/compression_service.rs +++ b/src/services/compression_service.rs @@ -1,4 +1,4 @@ -use crate::compression_engine::{get_compression_engine, CompressionType}; +use crate::compression_engine::{CompressionType, get_compression_engine}; use crate::services::error::CoreError; use anyhow::anyhow; use std::io::{Read, Write}; @@ -187,8 +187,8 @@ impl CompressionService { ) -> Box { match compression { CompressionType::GZip => { - use flate2::write::GzEncoder; use flate2::Compression; + use flate2::write::GzEncoder; Box::new(GzEncoder::new(writer, Compression::default())) } CompressionType::LZ4 => Box::new(lz4_flex::frame::FrameEncoder::new(writer)), diff --git a/src/tests/compression/gzip_tests.rs b/src/tests/compression/gzip_tests.rs index dbd0ed2..14fb2f4 100644 --- a/src/tests/compression/gzip_tests.rs +++ b/src/tests/compression/gzip_tests.rs @@ -1,7 +1,7 @@ #[cfg(test)] mod tests { - use crate::compression_engine::gzip::CompressionEngineGZip; use crate::compression_engine::CompressionEngine; + use crate::compression_engine::gzip::CompressionEngineGZip; use crate::tests::common::test_helpers::test_compression_engine; #[test] diff --git a/src/tests/meta_plugin/infer_tests.rs b/src/tests/meta_plugin/infer_tests.rs new file mode 100644 index 0000000..54c2e98 --- /dev/null +++ b/src/tests/meta_plugin/infer_tests.rs @@ -0,0 +1,33 @@ +#[cfg(test)] +mod tests { + use crate::meta_plugin::MetaPlugin; + use crate::meta_plugin::infer_plugin::InferMetaPlugin; + + #[test] + fn test_infer_meta_plugin() { + let plugin = InferMetaPlugin::new(None, None); + + assert_eq!( + plugin.meta_type(), + crate::meta_plugin::MetaPluginType::Infer + ); + assert!(plugin.is_internal()); + } + + #[test] + fn test_infer_png_detection() { + let png_header: &[u8] = &[ + 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, + 0x44, 0x52, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x08, 0x02, 0x00, 0x00, + 0x00, 0x90, 0x77, 0x53, 0xDE, + ]; + let mut plugin = InferMetaPlugin::new(None, None); + plugin.update(png_header); + let response = plugin.finalize(); + + assert!(response.is_finalized); + assert!(!response.metadata.is_empty()); + assert_eq!(response.metadata[0].name, "infer_mime_type"); + assert_eq!(response.metadata[0].value, "image/png"); + } +} diff --git a/src/tests/meta_plugin/mod.rs b/src/tests/meta_plugin/mod.rs index 734ec83..0b4323b 100644 --- a/src/tests/meta_plugin/mod.rs +++ b/src/tests/meta_plugin/mod.rs @@ -2,3 +2,11 @@ #[cfg(test)] pub mod digest_tests; + +#[cfg(feature = "infer")] +#[cfg(test)] +pub mod infer_tests; + +#[cfg(feature = "tree_magic_mini")] +#[cfg(test)] +pub mod tree_magic_mini_tests; diff --git a/src/tests/meta_plugin/tree_magic_mini_tests.rs b/src/tests/meta_plugin/tree_magic_mini_tests.rs new file mode 100644 index 0000000..540cfe3 --- /dev/null +++ b/src/tests/meta_plugin/tree_magic_mini_tests.rs @@ -0,0 +1,33 @@ +#[cfg(test)] +mod tests { + use crate::meta_plugin::MetaPlugin; + use crate::meta_plugin::tree_magic_mini::TreeMagicMiniMetaPlugin; + + #[test] + fn test_tree_magic_mini_meta_plugin() { + let plugin = TreeMagicMiniMetaPlugin::new(None, None); + + assert_eq!( + plugin.meta_type(), + crate::meta_plugin::MetaPluginType::TreeMagicMini + ); + assert!(plugin.is_internal()); + } + + #[test] + fn test_tree_magic_mini_png_detection() { + let png_header: &[u8] = &[ + 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, + 0x44, 0x52, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x08, 0x02, 0x00, 0x00, + 0x00, 0x90, 0x77, 0x53, 0xDE, + ]; + let mut plugin = TreeMagicMiniMetaPlugin::new(None, None); + plugin.update(png_header); + let response = plugin.finalize(); + + assert!(response.is_finalized); + assert!(!response.metadata.is_empty()); + assert_eq!(response.metadata[0].name, "tree_magic_mime_type"); + assert_eq!(response.metadata[0].value, "image/png"); + } +}