From b166477202c59a302e9e8bf37a4891bd9dbe2d44 Mon Sep 17 00:00:00 2001 From: Andrew Phillips Date: Fri, 13 Mar 2026 07:57:36 -0300 Subject: [PATCH] fix: harden security, eliminate panics, remove dead code, add Dockerfile Security: - Use constant-time password comparison (subtle crate) to prevent timing attacks - Replace permissive CORS with configurable origin-restricted CORS - Add TLS warning when password auth is used without HTTPS Bug fixes: - Convert MetaPlugin panics to anyhow::Result (get_meta_plugin, outputs_mut, options_mut) - Replace item.id.unwrap() with proper error handling across 15 call sites - Fix panic on unknown column type in list mode - Fix conflicting PIPESIZE constant (was 8192 vs 65536, now unified to 8192) - Add 256MB filter chain buffer limit to prevent OOM - Gracefully skip unregistered plugins instead of panicking Dead code removal: - Delete unused filter parser files (filter_parser.rs, filter.pest, parser/ module) - ~260 lines of dead PEG parser code removed Code consolidation: - Add is_content_binary_from_metadata() helper (was duplicated in 4 places) - Simplify save_item_raw() to delegate to save_item_raw_streaming() (~90 lines removed) Incomplete features: - Populate filter_plugins in status output from global registry - Add FallbackMagicFileMetaPlugin (was referenced but never implemented) - Document init_plugins() as intentional no-op Infrastructure: - Add Dockerfile (static musl binary on scratch, 4.8MB) - Add .dockerignore - Add cors_origin to ServerConfig and config.rs --- .dockerignore | 5 + Cargo.lock | 1 + Cargo.toml | 1 + Dockerfile | 36 ++++++ src/common/is_binary.rs | 22 ++++ src/common/status.rs | 29 ++++- src/config.rs | 5 + src/filter.pest | 47 ------- src/filter_parser.rs | 131 ------------------- src/filter_plugin/mod.rs | 28 +++++ src/lib.rs | 13 +- src/meta_plugin/cwd.rs | 12 +- src/meta_plugin/digest.rs | 12 +- src/meta_plugin/env.rs | 12 +- src/meta_plugin/exec.rs | 12 +- src/meta_plugin/hostname.rs | 12 +- src/meta_plugin/keep_pid.rs | 12 +- src/meta_plugin/magic.rs | 113 ++++++++++------- src/meta_plugin/magic_file.rs | 196 ++++++++++++++--------------- src/meta_plugin/mod.rs | 48 ++++--- src/meta_plugin/read_rate.rs | 12 +- src/meta_plugin/read_time.rs | 12 +- src/meta_plugin/shell.rs | 12 +- src/meta_plugin/shell_pid.rs | 12 +- src/meta_plugin/text.rs | 12 +- src/meta_plugin/user.rs | 12 +- src/modes/common.rs | 14 ++- src/modes/generate_config.rs | 2 + src/modes/get.rs | 4 +- src/modes/info.rs | 6 +- src/modes/list.rs | 8 +- src/modes/server/api/item.rs | 20 ++- src/modes/server/common.rs | 21 +++- src/modes/server/mod.rs | 42 ++++++- src/modes/status.rs | 14 ++- src/modes/status_plugins.rs | 4 +- src/parser/filter.pest | 30 ----- src/parser/filter_parser.rs | 119 ------------------ src/parser/mod.rs | 15 --- src/services/async_item_service.rs | 9 +- src/services/item_service.rs | 20 +-- src/services/meta_service.rs | 10 +- src/services/sync_data_service.rs | 91 ++------------ 43 files changed, 561 insertions(+), 687 deletions(-) create mode 100644 .dockerignore create mode 100644 Dockerfile delete mode 100644 src/filter.pest delete mode 100644 src/filter_parser.rs delete mode 100644 src/parser/filter.pest delete mode 100644 src/parser/filter_parser.rs delete mode 100644 src/parser/mod.rs diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..f827acb --- /dev/null +++ b/.dockerignore @@ -0,0 +1,5 @@ +target/ +.git/ +*.db +keep.db +bin/ diff --git a/Cargo.lock b/Cargo.lock index 1f90f25..2e93328 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1575,6 +1575,7 @@ dependencies = [ "stderrlog", "strip-ansi-escapes", "strum", + "subtle", "tempfile", "term", "thiserror 1.0.69", diff --git a/Cargo.toml b/Cargo.toml index 7763ecb..08d6126 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,6 +52,7 @@ serde_json = "1.0.142" serde_yaml = "0.9.34" sha2 = "0.10.0" md5 = "0.7.0" +subtle = "2.6" stderrlog = "0.6.0" strum = { version = "0.27.2", features = ["derive"] } term = "1.1.0" diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a245e5c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,36 @@ +# Build stage +FROM rust:1.88-slim AS builder + +RUN apt-get update && apt-get install -y --no-install-recommends \ + cmake \ + make \ + gcc \ + musl-tools \ + pkg-config \ + && rm -rf /var/lib/apt/lists/* + +RUN rustup target add x86_64-unknown-linux-musl + +WORKDIR /app + +# Copy manifests and fetch dependencies (cached layer) +COPY Cargo.toml Cargo.lock ./ +RUN mkdir src && echo 'fn main() {}' > src/main.rs && echo '' > src/lib.rs + +RUN cargo fetch --target x86_64-unknown-linux-musl + +# Copy real source and build static binary +# magic feature excluded (requires shared libmagic; fallback uses `file` command) +COPY src/ src/ +RUN cargo build --release --target x86_64-unknown-linux-musl \ + --no-default-features --features lz4,gzip \ + && strip target/x86_64-unknown-linux-musl/release/keep + +# Runtime stage - scratch since binary is fully static +FROM scratch + +COPY --from=builder /app/target/x86_64-unknown-linux-musl/release/keep /keep + +EXPOSE 21080 + +ENTRYPOINT ["/keep"] diff --git a/src/common/is_binary.rs b/src/common/is_binary.rs index dfb93d0..60364b8 100644 --- a/src/common/is_binary.rs +++ b/src/common/is_binary.rs @@ -229,3 +229,25 @@ fn calculate_printable_ratio(data: &[u8]) -> f64 { printable_count as f64 / data.len() as f64 } + +/// Check if content is binary, using metadata as a fast path. +/// +/// First checks for a "text" metadata field: +/// - "false" means binary +/// - "true" means text +/// - Absent or other values fall back to byte sampling +/// +/// # Arguments +/// +/// * `metadata` - Key-value metadata map (e.g., from `meta_as_map()`) +/// * `data` - Byte sample to analyze if metadata is inconclusive +pub fn is_content_binary_from_metadata( + metadata: &std::collections::HashMap, + data: &[u8], +) -> bool { + if let Some(text_val) = metadata.get("text") { + text_val == "false" + } else { + is_binary(data) + } +} diff --git a/src/common/status.rs b/src/common/status.rs index 3d5ce6d..588da9f 100644 --- a/src/common/status.rs +++ b/src/common/status.rs @@ -134,9 +134,16 @@ pub fn generate_status_info( for meta_plugin_type in sorted_meta_plugins { log::debug!("STATUS: Processing meta plugin type: {meta_plugin_type:?}"); - log::debug!("STATUS: About to call get_meta_plugin"); - let meta_plugin = crate::meta_plugin::get_meta_plugin(meta_plugin_type.clone(), None, None); - log::debug!("STATUS: Created meta plugin instance"); + let meta_plugin = + match crate::meta_plugin::get_meta_plugin(meta_plugin_type.clone(), None, None) { + Ok(p) => p, + Err(e) => { + log::warn!( + "STATUS: Skipping unregistered meta plugin {meta_plugin_type:?}: {e}" + ); + continue; + } + }; // Get meta name first to avoid borrowing issues log::debug!("STATUS: Getting meta name..."); @@ -175,12 +182,26 @@ pub fn generate_status_info( ); } + // Populate filter plugin info from the global registry + let filter_plugins_map = crate::services::filter_service::get_available_filter_plugins(); + let filter_plugins_info: Vec = filter_plugins_map + .into_iter() + .map(|(name, creator)| { + let plugin = creator(); + FilterPluginInfo { + name: name.clone(), + options: plugin.options(), + description: format!("{name} filter plugin"), + } + }) + .collect(); + StatusInfo { paths: path_info, compression: compression_info, meta_plugins: meta_plugins_map, enabled_meta_plugins: enabled_meta_plugins_vec, - filter_plugins: Vec::new(), + filter_plugins: filter_plugins_info, configured_meta_plugins: None, } } diff --git a/src/config.rs b/src/config.rs index 6ec7c52..d062a07 100644 --- a/src/config.rs +++ b/src/config.rs @@ -148,6 +148,7 @@ pub struct ServerConfig { pub password_hash: Option, pub cert_file: Option, pub key_file: Option, + pub cors_origin: Option, } #[derive(Debug, Clone, Deserialize, Serialize)] @@ -502,6 +503,10 @@ impl Settings { self.server.as_ref().and_then(|s| s.key_file.clone()) } + pub fn server_cors_origin(&self) -> Option { + self.server.as_ref().and_then(|s| s.cors_origin.clone()) + } + pub fn compression(&self) -> Option { self.compression_plugin.as_ref().map(|c| c.name.clone()) } diff --git a/src/filter.pest b/src/filter.pest deleted file mode 100644 index 9d85022..0000000 --- a/src/filter.pest +++ /dev/null @@ -1,47 +0,0 @@ -# This Pest grammar defines the syntax for filter chains used in the Keep application. -# Filters can be chained with commas and may have named or unnamed options with JSON-like values. - -WHITESPACE = _{ " " | "\t" | "\n" | "\r" } - -# Top-level rule for parsing multiple filters separated by commas. -filters = { filter ~ ("," ~ filters)? } - -# A single filter consisting of a name optionally followed by parenthesized options. -filter = { filter_name ~ ("(" ~ options ~ ")")? } - -# The name of a filter, starting with an ASCII letter followed by alphanumeric characters or underscores. -filter_name = @{ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* } - -# A list of comma-separated options within parentheses. -options = { option ~ ("," ~ options)? } - -# A single option, optionally with a name followed by an equals sign and a value. -option = { (option_name ~ "=")? ~ option_value } - -# The name of an option, starting with an ASCII letter followed by alphanumeric characters or underscores. -option_name = @{ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* } - -# The value of an option, which can be a JSON number, string, or boolean. -option_value = { - JSON_NUMBER | - JSON_STRING | - JSON_BOOLEAN -} - -# JSON number format supporting integers, decimals, and scientific notation. -JSON_NUMBER = @{ - ("-")? ~ - ("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*) ~ - ("." ~ ASCII_DIGIT*)? ~ - (("e" | "E") ~ ("+" | "-")? ~ ASCII_DIGIT+)? -} - -# JSON string format with escaped characters. -JSON_STRING = ${ - "\"" ~ - (("\\" ~ ANY) | (!("\"" | "\\") ~ ANY))* ~ - "\"" -} - -# JSON boolean values: true or false. -JSON_BOOLEAN = ${ "true" | "false" } diff --git a/src/filter_parser.rs b/src/filter_parser.rs deleted file mode 100644 index 30b0c0a..0000000 --- a/src/filter_parser.rs +++ /dev/null @@ -1,131 +0,0 @@ -use pest::Parser; -use pest_derive::Parser; -use std::collections::HashMap; - -#[derive(Parser)] -#[grammar = "filter.pest"] -pub struct FilterParser; - -#[derive(Debug)] -pub struct Filter { - pub name: String, - pub options: HashMap, -} - -pub fn parse_filter_string(input: &str) -> Result, Box> { - let mut filters = Vec::new(); - let pairs = FilterParser::parse(Rule::filters, input)?; - - for pair in pairs { - if pair.as_rule() == Rule::filter { - let mut name = String::new(); - let mut options = HashMap::new(); - - for inner_pair in pair.into_inner() { - match inner_pair.as_rule() { - Rule::filter_name => { - name = inner_pair.as_str().to_string(); - } - Rule::options => { - for option_pair in inner_pair.into_inner() { - if option_pair.as_rule() == Rule::option { - let mut option_name = None; - let mut option_value = None; - - for option_inner in option_pair.into_inner() { - match option_inner.as_rule() { - Rule::option_name => { - option_name = Some(option_inner.as_str().to_string()); - } - Rule::option_value => { - option_value = Some(parse_option_value(option_inner.as_str())?); - } - _ => {} - } - } - - if let Some(value) = option_value { - // If no name is provided, use the filter name as the key - let key = option_name.unwrap_or_else(|| name.clone()); - options.insert(key, value); - } - } - } - } - _ => {} - } - } - - filters.push(Filter { name, options }); - } - } - - Ok(filters) -} - -fn parse_option_value(input: &str) -> Result> { - // Try to parse as number - if let Ok(num) = input.parse::() { - return Ok(serde_json::Value::Number(num.into())); - } - if let Ok(num) = input.parse::() { - if let Some(number) = serde_json::Number::from_f64(num) { - return Ok(serde_json::Value::Number(number)); - } - } - - // Try to parse as boolean - if let Ok(boolean) = input.parse::() { - return Ok(serde_json::Value::Bool(boolean)); - } - - // Treat as string (remove quotes if present) - let value = if input.starts_with('"') && input.ends_with('"') { - input[1..input.len()-1].to_string() - } else if input.starts_with('\'') && input.ends_with('\'') { - input[1..input.len()-1].to_string() - } else { - input.to_string() - }; - - Ok(serde_json::Value::String(value)) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_parse_simple_filter() { - let result = parse_filter_string("grep").unwrap(); - assert_eq!(result.len(), 1); - assert_eq!(result[0].name, "grep"); - assert!(result[0].options.is_empty()); - } - - #[test] - fn test_parse_filter_with_options() { - let result = parse_filter_string("head_lines(10)").unwrap(); - assert_eq!(result.len(), 1); - assert_eq!(result[0].name, "head_lines"); - assert_eq!(result[0].options["head_lines"], 10); - } - - #[test] - fn test_parse_filter_with_named_options() { - let result = parse_filter_string("grep(pattern=\"error\")").unwrap(); - assert_eq!(result.len(), 1); - assert_eq!(result[0].name, "grep"); - assert_eq!(result[0].options["pattern"], "error"); - } - - #[test] - fn test_parse_multiple_filters() { - let result = parse_filter_string("head_lines(10), grep(pattern=\"error\")").unwrap(); - assert_eq!(result.len(), 2); - assert_eq!(result[0].name, "head_lines"); - assert_eq!(result[0].options["head_lines"], 10); - assert_eq!(result[1].name, "grep"); - assert_eq!(result[1].options["pattern"], "error"); - } -} diff --git a/src/filter_plugin/mod.rs b/src/filter_plugin/mod.rs index 589708e..579a09e 100644 --- a/src/filter_plugin/mod.rs +++ b/src/filter_plugin/mod.rs @@ -194,6 +194,10 @@ pub enum FilterType { StripAnsi, } +/// Maximum buffer size (256 MB) for filter chain intermediate results. +/// Prevents OOM on large files by rejecting inputs that exceed this limit. +const MAX_FILTER_BUFFER_SIZE: usize = 256 * 1024 * 1024; + /// A chain of filter plugins applied sequentially. /// /// Chains multiple filters, applying them in order to the input stream. @@ -334,6 +338,18 @@ impl FilterChain { let mut current_data = Vec::new(); std::io::copy(reader, &mut current_data)?; + if current_data.len() > MAX_FILTER_BUFFER_SIZE { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!( + "Input size ({} bytes) exceeds maximum filter buffer size ({} bytes). \ + Consider using fewer filter plugins or smaller inputs.", + current_data.len(), + MAX_FILTER_BUFFER_SIZE + ), + )); + } + // Store the plugins length to avoid borrowing issues let plugins_len = self.plugins.len(); @@ -348,6 +364,18 @@ impl FilterChain { // For intermediate plugins, write to a buffer let mut output_vec = Vec::new(); self.plugins[i].filter(&mut input, &mut output_vec)?; + + if output_vec.len() > MAX_FILTER_BUFFER_SIZE { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!( + "Filter output size ({} bytes) exceeds maximum filter buffer size ({} bytes).", + output_vec.len(), + MAX_FILTER_BUFFER_SIZE + ), + )); + } + current_data = output_vec; } } diff --git a/src/lib.rs b/src/lib.rs index 2f9c997..4e2a868 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -62,8 +62,13 @@ use crate::meta_plugin::magic_file; /// Initializes plugins at library load time. /// -/// Ensures all filter and meta plugins are registered via their ctors. -/// Call this early in application startup if needed (though ctors handle most cases). +/// Plugin registration happens automatically via `#[ctor]` constructors +/// when each plugin module is loaded. The explicit module imports in +/// `lib.rs` guarantee this happens at library initialization time. +/// +/// This function exists as a public API entry point for callers that +/// want to explicitly ensure plugins are ready. It intentionally does +/// no additional work. /// /// # Examples /// @@ -71,8 +76,8 @@ use crate::meta_plugin::magic_file; /// keep::init_plugins(); /// ``` pub fn init_plugins() { - // This will be expanded in Step 3 implementation - // For now, the ctors handle registration + // Plugins self-register via #[ctor] on module load. + // The use-statements in lib.rs guarantee module inclusion. } #[cfg(test)] diff --git a/src/meta_plugin/cwd.rs b/src/meta_plugin/cwd.rs index 9cd0328..a63c95c 100644 --- a/src/meta_plugin/cwd.rs +++ b/src/meta_plugin/cwd.rs @@ -105,16 +105,20 @@ impl MetaPlugin for CwdMetaPlugin { self.base.outputs() } - fn outputs_mut(&mut self) -> &mut std::collections::HashMap { - self.base.outputs_mut() + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.outputs_mut()) } fn options(&self) -> &std::collections::HashMap { self.base.options() } - fn options_mut(&mut self) -> &mut std::collections::HashMap { - self.base.options_mut() + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.options_mut()) } } use crate::meta_plugin::register_meta_plugin; diff --git a/src/meta_plugin/digest.rs b/src/meta_plugin/digest.rs index 3155dc1..a71de86 100644 --- a/src/meta_plugin/digest.rs +++ b/src/meta_plugin/digest.rs @@ -235,8 +235,10 @@ impl MetaPlugin for DigestMetaPlugin { self.base.outputs() } - fn outputs_mut(&mut self) -> &mut std::collections::HashMap { - self.base.outputs_mut() + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.outputs_mut()) } fn default_outputs(&self) -> Vec { @@ -251,8 +253,10 @@ impl MetaPlugin for DigestMetaPlugin { self.base.options() } - fn options_mut(&mut self) -> &mut std::collections::HashMap { - self.base.options_mut() + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.options_mut()) } } diff --git a/src/meta_plugin/env.rs b/src/meta_plugin/env.rs index d05d730..91f16e1 100644 --- a/src/meta_plugin/env.rs +++ b/src/meta_plugin/env.rs @@ -183,8 +183,10 @@ impl MetaPlugin for EnvMetaPlugin { /// # Returns /// /// A mutable reference to the `HashMap` of outputs. - fn outputs_mut(&mut self) -> &mut std::collections::HashMap { - self.base.outputs_mut() + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.outputs_mut()) } /// Returns the default output names based on collected env vars. @@ -212,8 +214,10 @@ impl MetaPlugin for EnvMetaPlugin { /// # Panics /// /// Panics with "options_mut() not implemented for EnvMetaPlugin". - fn options_mut(&mut self) -> &mut std::collections::HashMap { - self.base.options_mut() + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.options_mut()) } } use crate::meta_plugin::register_meta_plugin; diff --git a/src/meta_plugin/exec.rs b/src/meta_plugin/exec.rs index c4108c9..f8032b3 100644 --- a/src/meta_plugin/exec.rs +++ b/src/meta_plugin/exec.rs @@ -244,16 +244,20 @@ impl MetaPlugin for MetaPluginExec { &self.base.outputs } - fn outputs_mut(&mut self) -> &mut std::collections::HashMap { - &mut self.base.outputs + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(&mut self.base.outputs) } fn options(&self) -> &std::collections::HashMap { &self.base.options } - fn options_mut(&mut self) -> &mut std::collections::HashMap { - &mut self.base.options + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(&mut self.base.options) } fn default_outputs(&self) -> Vec { diff --git a/src/meta_plugin/hostname.rs b/src/meta_plugin/hostname.rs index 8aefcc1..d4e2f67 100644 --- a/src/meta_plugin/hostname.rs +++ b/src/meta_plugin/hostname.rs @@ -375,8 +375,10 @@ impl MetaPlugin for HostnameMetaPlugin { self.base.outputs() } - fn outputs_mut(&mut self) -> &mut std::collections::HashMap { - self.base.outputs_mut() + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.outputs_mut()) } fn default_outputs(&self) -> Vec { @@ -391,8 +393,10 @@ impl MetaPlugin for HostnameMetaPlugin { self.base.options() } - fn options_mut(&mut self) -> &mut std::collections::HashMap { - self.base.options_mut() + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.options_mut()) } } use crate::meta_plugin::register_meta_plugin; diff --git a/src/meta_plugin/keep_pid.rs b/src/meta_plugin/keep_pid.rs index babd0a4..5a18f6f 100644 --- a/src/meta_plugin/keep_pid.rs +++ b/src/meta_plugin/keep_pid.rs @@ -162,8 +162,10 @@ impl MetaPlugin for KeepPidMetaPlugin { /// # Returns /// /// A mutable reference to the `HashMap` of outputs. - fn outputs_mut(&mut self) -> &mut std::collections::HashMap { - self.base.outputs_mut() + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.outputs_mut()) } /// Returns the default output names for this plugin. @@ -189,8 +191,10 @@ impl MetaPlugin for KeepPidMetaPlugin { /// # Returns /// /// A mutable reference to the `HashMap` of options. - fn options_mut(&mut self) -> &mut std::collections::HashMap { - self.base.options_mut() + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.options_mut()) } } use crate::meta_plugin::register_meta_plugin; diff --git a/src/meta_plugin/magic.rs b/src/meta_plugin/magic.rs index 780d5eb..7f5a37f 100644 --- a/src/meta_plugin/magic.rs +++ b/src/meta_plugin/magic.rs @@ -21,16 +21,23 @@ impl MagicFileMetaPlugin { ) -> MagicFileMetaPlugin { // Start with default options let mut final_options = std::collections::HashMap::new(); - final_options.insert("max_buffer_size".to_string(), serde_yaml::Value::Number(PIPESIZE.into())); + final_options.insert( + "max_buffer_size".to_string(), + serde_yaml::Value::Number(PIPESIZE.into()), + ); if let Some(opts) = options { for (key, value) in opts { final_options.insert(key, value); } } - + // Start with default outputs let mut final_outputs = std::collections::HashMap::new(); - let default_outputs = vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()]; + let default_outputs = vec![ + "mime_type".to_string(), + "mime_encoding".to_string(), + "file_type".to_string(), + ]; for output_name in default_outputs { final_outputs.insert(output_name.clone(), serde_yaml::Value::String(output_name)); } @@ -39,20 +46,24 @@ impl MagicFileMetaPlugin { final_outputs.insert(key, value); } } - - let max_buffer_size = final_options.get("max_buffer_size") + + let max_buffer_size = final_options + .get("max_buffer_size") .and_then(|v| v.as_u64()) .unwrap_or(PIPESIZE as u64) as usize; - + // Ensure the default max_buffer_size is in the options if !final_options.contains_key("max_buffer_size") { - final_options.insert("max_buffer_size".to_string(), serde_yaml::Value::Number(PIPESIZE.into())); + final_options.insert( + "max_buffer_size".to_string(), + serde_yaml::Value::Number(PIPESIZE.into()), + ); } - + let mut base = crate::meta_plugin::BaseMetaPlugin::new(); base.outputs = final_outputs; base.options = final_options; - + MagicFileMetaPlugin { buffer: Vec::new(), max_buffer_size, @@ -61,24 +72,33 @@ impl MagicFileMetaPlugin { base, } } - - + fn get_magic_result(&self, flags: CookieFlags) -> io::Result { // Use the existing cookie and just change flags if let Some(cookie) = &self.cookie { - cookie.set_flags(flags) - .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to set magic flags: {}", e)))?; + cookie.set_flags(flags).map_err(|e| { + io::Error::new( + io::ErrorKind::Other, + format!("Failed to set magic flags: {}", e), + ) + })?; - let result = cookie.buffer(&self.buffer) - .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?; + let result = cookie.buffer(&self.buffer).map_err(|e| { + io::Error::new( + io::ErrorKind::Other, + format!("Failed to analyze buffer: {}", e), + ) + })?; // Clean up the result - remove extra whitespace and take first part if needed let trimmed = result.trim(); - + // For some magic results, we might want just the first part before semicolon or comma let cleaned = if trimmed.contains(';') { trimmed.split(';').next().unwrap_or(trimmed).trim() - } else if trimmed.contains(',') && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) { + } else if trimmed.contains(',') + && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) + { trimmed.split(',').next().unwrap_or(trimmed).trim() } else { trimmed @@ -86,36 +106,39 @@ impl MagicFileMetaPlugin { Ok(cleaned.to_string()) } else { - Err(io::Error::new(io::ErrorKind::Other, "Magic cookie not initialized")) + Err(io::Error::new( + io::ErrorKind::Other, + "Magic cookie not initialized", + )) } } /// Helper function to process all magic types and collect metadata fn process_magic_types(&self) -> Vec { let mut metadata = Vec::new(); - + // Define the types to process with their corresponding flags let types_to_process = [ ("mime_type", CookieFlags::MIME_TYPE), ("mime_encoding", CookieFlags::MIME_ENCODING), ("file_type", CookieFlags::default()), ]; - + for (name, flags) in types_to_process.iter() { if let Ok(result) = self.get_magic_result(*flags) { if !result.is_empty() { // Use process_metadata_outputs to handle output mapping if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( - name, - serde_yaml::Value::String(result), - self.base.outputs() + name, + serde_yaml::Value::String(result), + self.base.outputs(), ) { metadata.push(meta_data); } } } } - + metadata } } @@ -129,7 +152,7 @@ impl MetaPlugin for MagicFileMetaPlugin { fn is_finalized(&self) -> bool { self.is_finalized } - + /// Sets the finalized state of the plugin. /// /// # Arguments @@ -138,7 +161,7 @@ impl MetaPlugin for MagicFileMetaPlugin { fn set_finalized(&mut self, finalized: bool) { self.is_finalized = finalized; } - + /// Initializes the magic cookie for file type detection. /// /// Loads the magic database; finalizes if initialization fails. @@ -206,9 +229,9 @@ impl MetaPlugin for MagicFileMetaPlugin { is_finalized: true, }; } - + let metadata = self.process_magic_types(); - + // Mark as finalized self.is_finalized = true; @@ -244,7 +267,7 @@ impl MetaPlugin for MagicFileMetaPlugin { is_finalized: true, }; } - + let mut metadata = Vec::new(); // Only collect up to max_buffer_size @@ -256,7 +279,7 @@ impl MetaPlugin for MagicFileMetaPlugin { // Check if we've reached our buffer limit and return metadata if self.buffer.len() >= self.max_buffer_size { metadata = self.process_magic_types(); - + // Mark as finalized when we've processed enough data self.is_finalized = true; } @@ -277,8 +300,7 @@ impl MetaPlugin for MagicFileMetaPlugin { fn meta_type(&self) -> MetaPluginType { MetaPluginType::MagicFile } - - + /// Returns a reference to the outputs mapping. /// /// # Returns @@ -287,26 +309,31 @@ impl MetaPlugin for MagicFileMetaPlugin { fn outputs(&self) -> &std::collections::HashMap { self.base.outputs() } - + /// Returns a mutable reference to the outputs mapping. /// /// # Returns /// /// A mutable reference to the `HashMap` of outputs. - fn outputs_mut(&mut self) -> &mut std::collections::HashMap { - self.base.outputs_mut() + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.outputs_mut()) } - + /// Returns the default output names for this plugin. /// /// # Returns /// /// Vector of default output field names. fn default_outputs(&self) -> Vec { - vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()] + vec![ + "mime_type".to_string(), + "mime_encoding".to_string(), + "file_type".to_string(), + ] } - - + /// Returns a reference to the options mapping. /// /// # Returns @@ -315,14 +342,16 @@ impl MetaPlugin for MagicFileMetaPlugin { fn options(&self) -> &std::collections::HashMap { self.base.options() } - + /// Returns a mutable reference to the options mapping. /// /// # Returns /// /// A mutable reference to the `HashMap` of options. - fn options_mut(&mut self) -> &mut std::collections::HashMap { - self.base.options_mut() + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.options_mut()) } } diff --git a/src/meta_plugin/magic_file.rs b/src/meta_plugin/magic_file.rs index 437c5d3..f75ddbe 100644 --- a/src/meta_plugin/magic_file.rs +++ b/src/meta_plugin/magic_file.rs @@ -187,8 +187,10 @@ impl MetaPlugin for MagicFileMetaPluginImpl { self.base.outputs() } - fn outputs_mut(&mut self) -> &mut std::collections::HashMap { - self.base.outputs_mut() + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.outputs_mut()) } fn default_outputs(&self) -> Vec { @@ -203,11 +205,16 @@ impl MetaPlugin for MagicFileMetaPluginImpl { self.base.options() } - fn options_mut(&mut self) -> &mut std::collections::HashMap { - self.base.options_mut() + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.options_mut()) } } +#[cfg(feature = "magic")] +pub use MagicFileMetaPluginImpl as MagicFileMetaPlugin; + #[cfg(not(feature = "magic"))] #[derive(Debug)] pub struct FallbackMagicFileMetaPlugin { @@ -222,21 +229,18 @@ impl FallbackMagicFileMetaPlugin { pub fn new( options: Option>, outputs: Option>, - ) -> FallbackMagicFileMetaPlugin { + ) -> Self { let mut base = BaseMetaPlugin::new(); - - // Set default outputs let default_outputs = &["mime_type", "mime_encoding", "file_type"]; base.initialize_plugin(default_outputs, &options, &outputs); - // Get max_buffer_size from options, default to PIPESIZE let max_buffer_size = base .options .get("max_buffer_size") .and_then(|v| v.as_u64()) .unwrap_or(crate::common::PIPESIZE as u64) as usize; - FallbackMagicFileMetaPlugin { + Self { buffer: Vec::new(), max_buffer_size, is_finalized: false, @@ -244,68 +248,75 @@ impl FallbackMagicFileMetaPlugin { } } - fn run_file_command(&self, buffer: &[u8]) -> io::Result { - let mut temp_file = tempfile::NamedTempFile::new()?; - temp_file.as_ref().write_all(buffer)?; - + fn run_file_command(&self, args: &[&str]) -> Option { let output = Command::new("file") - .arg("-b") - .arg("-m") - .arg("all") - .arg(temp_file.path()) - .output() - .map_err(|e| { - io::Error::new( - io::ErrorKind::Other, - format!("Failed to run file command: {}", e), - ) - })?; + .args(args) + .arg("-") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn() + .and_then(|mut child| { + if let Some(mut stdin) = child.stdin.take() { + let _ = stdin.write_all(&self.buffer); + } + child.wait_with_output() + }); - if !output.status.success() { - return Err(io::Error::new(io::ErrorKind::Other, "File command failed")); - } - - let result = String::from_utf8_lossy(&output.stdout).trim().to_string(); - Ok(result) + output + .ok() + .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string()) } - fn process_file_output(&self, result: &str) -> Vec { + fn detect_type(&self) -> Vec { let mut metadata = Vec::new(); - // Parse the file command output - // file -m all output format is typically: type; charset=encoding - let parts: Vec<&str> = result.split(';').map(|s| s.trim()).collect(); - let file_type = parts.first().cloned().unwrap_or(result); - let mime_encoding = parts - .get(1) - .and_then(|s| s.strip_prefix("charset=")) - .cloned() - .unwrap_or(""); + // Get mime_type and mime_encoding via --mime + if let Some(mime_line) = self.run_file_command(&["--brief", "--mime"]) { + // Format: "text/plain; charset=us-ascii" + if let Some((mime_type, rest)) = mime_line.split_once(';') { + let mime_type = mime_type.trim().to_string(); + let mime_encoding = rest + .trim() + .strip_prefix("charset=") + .unwrap_or("binary") + .to_string(); - // For mime_type, try to infer from file type or use a heuristic - let mime_type = if file_type.starts_with("text") { - "text/plain" - } else if file_type.contains("ASCII") || file_type.contains("UTF-8") { - "text/plain" - } else if file_type.contains("empty") { - "application/octet-stream" - } else { - "application/octet-stream" // default - }; + if let Some(meta_data) = process_metadata_outputs( + "mime_type", + serde_yaml::Value::String(mime_type), + self.base.outputs(), + ) { + metadata.push(meta_data); + } + if let Some(meta_data) = process_metadata_outputs( + "mime_encoding", + serde_yaml::Value::String(mime_encoding), + self.base.outputs(), + ) { + metadata.push(meta_data); + } + } else { + // No charset, just mime type + if let Some(meta_data) = process_metadata_outputs( + "mime_type", + serde_yaml::Value::String(mime_line), + self.base.outputs(), + ) { + metadata.push(meta_data); + } + } + } - let outputs_to_process = [ - ("mime_type", mime_type), - ("mime_encoding", mime_encoding), - ("file_type", file_type), - ]; - - for (name, value) in outputs_to_process.iter() { - if let Some(meta_data) = process_metadata_outputs( - name, - serde_yaml::Value::String(value.to_string()), - self.base.outputs(), - ) { - metadata.push(meta_data); + // Get human-readable file type via --brief + if let Some(file_type) = self.run_file_command(&["--brief"]) { + if !file_type.is_empty() { + if let Some(meta_data) = process_metadata_outputs( + "file_type", + serde_yaml::Value::String(file_type), + self.base.outputs(), + ) { + metadata.push(meta_data); + } } } @@ -324,7 +335,6 @@ impl MetaPlugin for FallbackMagicFileMetaPlugin { } fn initialize(&mut self) -> MetaPluginResponse { - // No initialization needed for fallback MetaPluginResponse { metadata: Vec::new(), is_finalized: false, @@ -339,27 +349,18 @@ impl MetaPlugin for FallbackMagicFileMetaPlugin { }; } - let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len()); - if remaining_capacity > 0 { - let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity); - self.buffer.extend_from_slice(&data[..bytes_to_copy]); + let remaining = self.max_buffer_size.saturating_sub(self.buffer.len()); + if remaining > 0 { + let n = std::cmp::min(data.len(), remaining); + self.buffer.extend_from_slice(&data[..n]); if self.buffer.len() >= self.max_buffer_size { - if let Ok(result) = self.run_file_command(&self.buffer) { - let metadata = self.process_file_output(&result); - self.is_finalized = true; - return MetaPluginResponse { - metadata, - is_finalized: true, - }; - } else { - // On error, finalize with empty metadata - self.is_finalized = true; - return MetaPluginResponse { - metadata: Vec::new(), - is_finalized: true, - }; - } + let metadata = self.detect_type(); + self.is_finalized = true; + return MetaPluginResponse { + metadata, + is_finalized: true, + }; } } @@ -376,21 +377,9 @@ impl MetaPlugin for FallbackMagicFileMetaPlugin { is_finalized: true, }; } - - let metadata = if !self.buffer.is_empty() { - if let Ok(result) = self.run_file_command(&self.buffer) { - self.process_file_output(&result) - } else { - Vec::new() - } - } else { - Vec::new() - }; - self.is_finalized = true; - MetaPluginResponse { - metadata, + metadata: self.detect_type(), is_finalized: true, } } @@ -403,8 +392,10 @@ impl MetaPlugin for FallbackMagicFileMetaPlugin { self.base.outputs() } - fn outputs_mut(&mut self) -> &mut std::collections::HashMap { - self.base.outputs_mut() + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.outputs_mut()) } fn default_outputs(&self) -> Vec { @@ -419,14 +410,13 @@ impl MetaPlugin for FallbackMagicFileMetaPlugin { self.base.options() } - fn options_mut(&mut self) -> &mut std::collections::HashMap { - self.base.options_mut() + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.options_mut()) } } -#[cfg(feature = "magic")] -pub use MagicFileMetaPluginImpl as MagicFileMetaPlugin; - #[cfg(not(feature = "magic"))] pub use FallbackMagicFileMetaPlugin as MagicFileMetaPlugin; diff --git a/src/meta_plugin/mod.rs b/src/meta_plugin/mod.rs index 676bde3..03ecf8b 100644 --- a/src/meta_plugin/mod.rs +++ b/src/meta_plugin/mod.rs @@ -10,7 +10,6 @@ pub mod env; pub mod exec; pub mod hostname; pub mod keep_pid; -#[cfg(feature = "magic")] pub mod magic_file; pub mod read_rate; pub mod read_time; @@ -179,8 +178,10 @@ impl MetaPlugin for BaseMetaPlugin { /// # Returns /// /// A mutable reference to the `HashMap` of outputs. - fn outputs_mut(&mut self) -> &mut std::collections::HashMap { - &mut self.outputs + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(&mut self.outputs) } /// Returns a reference to the options mapping. @@ -197,8 +198,10 @@ impl MetaPlugin for BaseMetaPlugin { /// # Returns /// /// A mutable reference to the `HashMap` of options. - fn options_mut(&mut self) -> &mut std::collections::HashMap { - &mut self.options + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(&mut self.options) } } @@ -424,11 +427,17 @@ where /// Returns a mutable reference to the outputs mapping. /// - /// # Panics + /// # Returns /// - /// Panics with "outputs_mut() not implemented for this plugin". - fn outputs_mut(&mut self) -> &mut std::collections::HashMap { - panic!("outputs_mut() not implemented for this plugin") + /// A mutable reference to the outputs `HashMap`. + /// + /// # Errors + /// + /// Returns an error if the plugin does not support mutable outputs. + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + anyhow::bail!("outputs_mut() not supported by this plugin") } /// Returns a reference to the options mapping. @@ -445,11 +454,17 @@ where /// Returns a mutable reference to the options mapping. /// - /// # Panics + /// # Returns /// - /// Panics with "options_mut() not implemented for this plugin". - fn options_mut(&mut self) -> &mut std::collections::HashMap { - panic!("options_mut() not implemented for this plugin") + /// A mutable reference to the options `HashMap`. + /// + /// # Errors + /// + /// Returns an error if the plugin does not support mutable options. + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + anyhow::bail!("options_mut() not supported by this plugin") } /// Gets the default output names this plugin can produce. @@ -496,12 +511,11 @@ pub fn get_meta_plugin( meta_plugin_type: MetaPluginType, options: Option>, outputs: Option>, -) -> Box { +) -> anyhow::Result> { let registry = META_PLUGIN_REGISTRY.lock().unwrap(); if let Some(constructor) = registry.get(&meta_plugin_type) { - return constructor(options, outputs); + return Ok(constructor(options, outputs)); } - // Fallback for unknown plugins - panic!("Meta plugin {meta_plugin_type:?} not registered"); + anyhow::bail!("Meta plugin {meta_plugin_type:?} not registered") } diff --git a/src/meta_plugin/read_rate.rs b/src/meta_plugin/read_rate.rs index 519482b..265ae23 100644 --- a/src/meta_plugin/read_rate.rs +++ b/src/meta_plugin/read_rate.rs @@ -193,8 +193,10 @@ impl MetaPlugin for ReadRateMetaPlugin { /// # Returns /// /// Mutable reference to the outputs HashMap. - fn outputs_mut(&mut self) -> &mut std::collections::HashMap { - self.base.outputs_mut() + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.outputs_mut()) } /// Returns the default output names for this plugin. @@ -222,8 +224,10 @@ impl MetaPlugin for ReadRateMetaPlugin { /// # Returns /// /// Mutable reference to the options HashMap. - fn options_mut(&mut self) -> &mut std::collections::HashMap { - self.base.options_mut() + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.options_mut()) } } use crate::meta_plugin::register_meta_plugin; diff --git a/src/meta_plugin/read_time.rs b/src/meta_plugin/read_time.rs index aef6da2..f01832e 100644 --- a/src/meta_plugin/read_time.rs +++ b/src/meta_plugin/read_time.rs @@ -97,8 +97,10 @@ impl MetaPlugin for ReadTimeMetaPlugin { self.base.outputs() } - fn outputs_mut(&mut self) -> &mut std::collections::HashMap { - self.base.outputs_mut() + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.outputs_mut()) } fn default_outputs(&self) -> Vec { @@ -109,8 +111,10 @@ impl MetaPlugin for ReadTimeMetaPlugin { self.base.options() } - fn options_mut(&mut self) -> &mut std::collections::HashMap { - self.base.options_mut() + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.options_mut()) } } use crate::meta_plugin::register_meta_plugin; diff --git a/src/meta_plugin/shell.rs b/src/meta_plugin/shell.rs index 7ea8d26..cb84070 100644 --- a/src/meta_plugin/shell.rs +++ b/src/meta_plugin/shell.rs @@ -194,8 +194,10 @@ impl MetaPlugin for ShellMetaPlugin { /// # Returns /// /// * `&mut HashMap` - Mutable outputs map. - fn outputs_mut(&mut self) -> &mut std::collections::HashMap { - self.base.outputs_mut() + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.outputs_mut()) } /// Returns the default output names for this plugin. @@ -221,8 +223,10 @@ impl MetaPlugin for ShellMetaPlugin { /// # Returns /// /// * `&mut HashMap` - Mutable options map. - fn options_mut(&mut self) -> &mut std::collections::HashMap { - self.base.options_mut() + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.options_mut()) } } /// Registers the shell meta plugin with the global registry. diff --git a/src/meta_plugin/shell_pid.rs b/src/meta_plugin/shell_pid.rs index 5d1ad2c..0c98621 100644 --- a/src/meta_plugin/shell_pid.rs +++ b/src/meta_plugin/shell_pid.rs @@ -109,16 +109,20 @@ impl MetaPlugin for ShellPidMetaPlugin { self.base.outputs() } - fn outputs_mut(&mut self) -> &mut std::collections::HashMap { - self.base.outputs_mut() + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.outputs_mut()) } fn options(&self) -> &std::collections::HashMap { self.base.options() } - fn options_mut(&mut self) -> &mut std::collections::HashMap { - self.base.options_mut() + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.options_mut()) } } use crate::meta_plugin::register_meta_plugin; diff --git a/src/meta_plugin/text.rs b/src/meta_plugin/text.rs index d13ce27..395a235 100644 --- a/src/meta_plugin/text.rs +++ b/src/meta_plugin/text.rs @@ -769,8 +769,10 @@ impl MetaPlugin for TextMetaPlugin { /// # Returns /// /// A mutable reference to the `HashMap` of outputs. - fn outputs_mut(&mut self) -> &mut std::collections::HashMap { - self.base.outputs_mut() + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.outputs_mut()) } /// Returns the default output names for this plugin. @@ -803,8 +805,10 @@ impl MetaPlugin for TextMetaPlugin { /// # Returns /// /// A mutable reference to the `HashMap` of outputs. - fn options_mut(&mut self) -> &mut std::collections::HashMap { - self.base.options_mut() + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.options_mut()) } } use crate::meta_plugin::register_meta_plugin; diff --git a/src/meta_plugin/user.rs b/src/meta_plugin/user.rs index 7cfcbae..0bb529e 100644 --- a/src/meta_plugin/user.rs +++ b/src/meta_plugin/user.rs @@ -119,8 +119,10 @@ impl MetaPlugin for UserMetaPlugin { /// # Returns /// /// A mutable reference to the `HashMap` of outputs. - fn outputs_mut(&mut self) -> &mut std::collections::HashMap { - self.base.outputs_mut() + fn outputs_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.outputs_mut()) } /// Returns the default output names. @@ -151,8 +153,10 @@ impl MetaPlugin for UserMetaPlugin { /// # Returns /// /// A mutable reference to the `HashMap` of options. - fn options_mut(&mut self) -> &mut std::collections::HashMap { - self.base.options_mut() + fn options_mut( + &mut self, + ) -> anyhow::Result<&mut std::collections::HashMap> { + Ok(self.base.options_mut()) } } use crate::meta_plugin::register_meta_plugin; diff --git a/src/modes/common.rs b/src/modes/common.rs index 98b08bf..7363627 100644 --- a/src/modes/common.rs +++ b/src/modes/common.rs @@ -206,12 +206,14 @@ pub fn settings_meta_plugin_types( // Try to find the MetaPluginType by meta name let mut found = false; for meta_plugin_type in MetaPluginType::iter() { - let meta_plugin = - crate::meta_plugin::get_meta_plugin(meta_plugin_type.clone(), None, None); - if meta_plugin.meta_type().to_string() == trimmed_name { - meta_plugin_types.push(meta_plugin_type); - found = true; - break; + if let Ok(meta_plugin) = + crate::meta_plugin::get_meta_plugin(meta_plugin_type.clone(), None, None) + { + if meta_plugin.meta_type().to_string() == trimmed_name { + meta_plugin_types.push(meta_plugin_type); + found = true; + break; + } } } diff --git a/src/modes/generate_config.rs b/src/modes/generate_config.rs index d041f23..c06a795 100644 --- a/src/modes/generate_config.rs +++ b/src/modes/generate_config.rs @@ -53,6 +53,7 @@ struct ServerConfig { password_file: Option, password: Option, password_hash: Option, + cors_origin: Option, } #[derive(Debug, Serialize, Deserialize)] @@ -145,6 +146,7 @@ pub fn mode_generate_config(_cmd: &mut Command, _settings: &crate::config::Setti password_file: None, password: None, password_hash: None, + cors_origin: None, }), compression_plugin: None, meta_plugins: Some(vec![ diff --git a/src/modes/get.rs b/src/modes/get.rs index 0c6c8da..63fa48e 100644 --- a/src/modes/get.rs +++ b/src/modes/get.rs @@ -1,4 +1,4 @@ -use anyhow::{Result, anyhow}; +use anyhow::{Context, Result, anyhow}; use std::io::Write; use crate::common::PIPESIZE; @@ -55,7 +55,7 @@ pub fn mode_get( .find_item(conn, ids, tags, &std::collections::HashMap::new()) .map_err(|e| anyhow!("Unable to find matching item in database: {}", e))?; - let item_id = item_with_meta.item.id.unwrap(); + let item_id = item_with_meta.item.id.context("Item missing ID")?; // Determine if we should detect binary data let mut detect_binary = !settings.force && std::io::stdout().is_terminal(); diff --git a/src/modes/info.rs b/src/modes/info.rs index bea41d8..5692a42 100644 --- a/src/modes/info.rs +++ b/src/modes/info.rs @@ -1,7 +1,7 @@ use crate::config; use crate::modes::common::{OutputFormat, format_size}; use crate::services::types::ItemWithMeta; -use anyhow::{Result, anyhow}; +use anyhow::{Context, Result, anyhow}; use clap::Command; use clap::error::ErrorKind; use serde::{Deserialize, Serialize}; @@ -141,7 +141,7 @@ fn show_item( } let item = item_with_meta.item; - let item_id = item.id.unwrap(); + let item_id = item.id.context("Item missing ID")?; let item_tags: Vec = item_with_meta.tags.iter().map(|t| t.name.clone()).collect(); let mut table = crate::modes::common::create_table(false); @@ -249,7 +249,7 @@ fn show_item_structured( let item_tags: Vec = item_with_meta.tags.iter().map(|t| t.name.clone()).collect(); let meta_map = item_with_meta.meta_as_map(); let item = item_with_meta.item; - let item_id = item.id.unwrap(); + let item_id = item.id.context("Item missing ID")?; let mut item_path_buf = data_path.clone(); item_path_buf.push(item_id.to_string()); diff --git a/src/modes/list.rs b/src/modes/list.rs index bb03fb1..69b9158 100644 --- a/src/modes/list.rs +++ b/src/modes/list.rs @@ -8,7 +8,7 @@ use crate::modes::common::ColumnType; use crate::modes::common::{OutputFormat, format_size}; use crate::services::item_service::ItemService; use crate::services::types::ItemWithMeta; -use anyhow::Result; +use anyhow::{Context, Result}; use comfy_table::CellAlignment; use comfy_table::{Attribute, Cell, Color, Row}; use serde::{Deserialize, Serialize}; @@ -202,7 +202,7 @@ pub fn mode_list( let item = item_with_meta.item; let mut item_path = data_path.clone(); - item_path.push(item.id.unwrap().to_string()); + item_path.push(item.id.context("Item missing ID")?.to_string()); let mut table_row = Row::new(); @@ -210,7 +210,7 @@ pub fn mode_list( let column_type = column .name .parse::() - .unwrap_or_else(|_| panic!("Unknown column {:?}", column.name)); + .with_context(|| format!("Unknown column type {:?} in list format", column.name))?; let mut meta_name: Option<&str> = None; @@ -343,7 +343,7 @@ fn show_list_structured( let tags: Vec = item_with_meta.tags.iter().map(|t| t.name.clone()).collect(); let meta = item_with_meta.meta_as_map(); let item = item_with_meta.item; - let item_id = item.id.unwrap(); + let item_id = item.id.context("Item missing ID")?; let mut item_path = data_path.clone(); item_path.push(item_id.to_string()); diff --git a/src/modes/server/api/item.rs b/src/modes/server/api/item.rs index 5169158..5564fe0 100644 --- a/src/modes/server/api/item.rs +++ b/src/modes/server/api/item.rs @@ -481,7 +481,10 @@ pub async fn handle_post_item( let metadata = item_with_meta.meta_as_map(); let item_info = ItemInfo { - id: item_with_meta.item.id.unwrap(), + id: item_with_meta.item.id.ok_or_else(|| { + warn!("Item missing ID"); + StatusCode::INTERNAL_SERVER_ERROR + })?, ts: item_with_meta.item.ts.to_rfc3339(), size: item_with_meta.item.size, compression, @@ -542,7 +545,10 @@ pub async fn handle_get_item_latest_content( match item_with_meta { Ok(item) => { - let item_id = item.item.id.unwrap(); + let item_id = item.item.id.ok_or_else(|| { + warn!("Item missing ID"); + StatusCode::INTERNAL_SERVER_ERROR + })?; let metadata = item.meta_as_map(); // Handle as_meta parameter if params.as_meta { @@ -948,7 +954,10 @@ pub async fn handle_delete_item( .map_err(handle_item_error)?; let item_info = ItemInfo { - id: deleted_item.id.unwrap(), + id: deleted_item.id.ok_or_else(|| { + warn!("Item missing ID"); + StatusCode::INTERNAL_SERVER_ERROR + })?, ts: deleted_item.ts.to_rfc3339(), size: deleted_item.size, compression: deleted_item.compression, @@ -1001,7 +1010,10 @@ pub async fn handle_get_item_info( let metadata = item_with_meta.meta_as_map(); let item_info = ItemInfo { - id: item_with_meta.item.id.unwrap(), + id: item_with_meta.item.id.ok_or_else(|| { + warn!("Item missing ID"); + StatusCode::INTERNAL_SERVER_ERROR + })?, ts: item_with_meta.item.ts.to_rfc3339(), size: item_with_meta.item.size, compression: item_with_meta.item.compression.clone(), diff --git a/src/modes/server/common.rs b/src/modes/server/common.rs index 77d0c84..fb31935 100644 --- a/src/modes/server/common.rs +++ b/src/modes/server/common.rs @@ -27,6 +27,7 @@ use std::net::SocketAddr; use std::path::PathBuf; use std::sync::Arc; use std::time::Instant; +use subtle::ConstantTimeEq; use tokio::sync::Mutex; use utoipa::ToSchema; @@ -75,6 +76,12 @@ pub struct ServerConfig { /// /// When both cert_file and key_file are set, the server uses HTTPS. pub key_file: Option, + /// Optional CORS allowed origin. + /// + /// When set, cross-origin requests are restricted to this origin. + /// Defaults to "http://localhost" if not specified. Use "*" to allow + /// all origins (not recommended for production). + pub cors_origin: Option, } /// Application state shared across all routes. @@ -661,8 +668,11 @@ fn check_bearer_auth( return pwhash::unix::verify(provided_password, hash); } - // Otherwise, do direct comparison - provided_password == expected_password + // Otherwise, do constant-time comparison to prevent timing attacks + provided_password + .as_bytes() + .ct_eq(expected_password.as_bytes()) + .into() } /// Validates basic authentication credentials. @@ -704,9 +714,12 @@ fn check_basic_auth( return pwhash::unix::verify(provided_password, hash); } - // Otherwise, do direct comparison + // Otherwise, do constant-time comparison to prevent timing attacks let expected_credentials = format!("keep:{expected_password}"); - return decoded_str == expected_credentials; + return decoded_str + .as_bytes() + .ct_eq(expected_credentials.as_bytes()) + .into(); } } } diff --git a/src/modes/server/mod.rs b/src/modes/server/mod.rs index 416a25b..5accb25 100644 --- a/src/modes/server/mod.rs +++ b/src/modes/server/mod.rs @@ -54,6 +54,7 @@ pub fn mode_server( password_hash: settings.server_password_hash(), cert_file: settings.server_cert_file(), key_file: settings.server_key_file(), + cors_origin: settings.server_cors_origin(), }; // Create ItemService once @@ -122,6 +123,31 @@ async fn run_server( create_auth_middleware(config.password.clone(), config.password_hash.clone()), )); + // Build CORS layer - restricted by default, configurable via cors_origin setting + let cors_origin = config.cors_origin.as_deref().unwrap_or("http://localhost"); + let cors_layer = if cors_origin == "*" { + CorsLayer::permissive() + } else { + CorsLayer::new() + .allow_origin( + cors_origin + .parse::() + .unwrap_or_else(|_| { + log::warn!( + "Invalid CORS origin '{cors_origin}', defaulting to http://localhost" + ); + "http://localhost".parse().unwrap() + }), + ) + .allow_methods([ + axum::http::Method::GET, + axum::http::Method::POST, + axum::http::Method::PUT, + axum::http::Method::DELETE, + ]) + .allow_headers(tower_http::cors::Any) + }; + // Create the app with documentation routes open and others protected let app = Router::new() // Add documentation routes without authentication @@ -135,11 +161,25 @@ async fn run_server( .layer( ServiceBuilder::new() .layer(TraceLayer::new_for_http()) - .layer(CorsLayer::permissive()), + .layer(cors_layer), ); let addr: SocketAddr = bind_address.parse()?; + // Warn if password auth is enabled without TLS + if config.password.is_some() || config.password_hash.is_some() { + #[cfg(not(feature = "tls"))] + log::warn!( + "SECURITY: Password authentication enabled but TLS support is not compiled in. Password will be transmitted in plain text!" + ); + #[cfg(feature = "tls")] + if config.cert_file.is_none() || config.key_file.is_none() { + log::warn!( + "SECURITY: Password authentication enabled but TLS is not configured. Password will be transmitted in plain text!" + ); + } + } + // Build the app into a service let service = app.into_make_service_with_connect_info::(); diff --git a/src/modes/status.rs b/src/modes/status.rs index f1931ad..cf3f1e1 100644 --- a/src/modes/status.rs +++ b/src/modes/status.rs @@ -78,7 +78,9 @@ fn build_meta_plugins_configured_table(status_info: &StatusInfo) -> Option Option
= default_plugin.options().iter().collect(); diff --git a/src/parser/filter.pest b/src/parser/filter.pest deleted file mode 100644 index 3333e20..0000000 --- a/src/parser/filter.pest +++ /dev/null @@ -1,30 +0,0 @@ -WHITESPACE = _{ " " | "\t" | "\n" | "\r" } - -filters = { filter ~ ("," ~ filters)? } -filter = { filter_name ~ ("(" ~ options ~ ")")? } -filter_name = @{ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* } - -options = { option ~ ("," ~ options)? } -option = { (option_name ~ "=")? ~ option_value } -option_name = @{ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* } - -option_value = { - JSON_NUMBER | - JSON_STRING | - JSON_BOOLEAN -} - -JSON_NUMBER = @{ - ("-")? ~ - ("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*) ~ - ("." ~ ASCII_DIGIT*)? ~ - (("e" | "E") ~ ("+" | "-")? ~ ASCII_DIGIT+)? -} - -JSON_STRING = ${ - "\"" ~ - (("\\" ~ ANY) | (!("\"" | "\\") ~ ANY))* ~ - "\"" -} - -JSON_BOOLEAN = ${ "true" | "false" } diff --git a/src/parser/filter_parser.rs b/src/parser/filter_parser.rs deleted file mode 100644 index 11a949f..0000000 --- a/src/parser/filter_parser.rs +++ /dev/null @@ -1,119 +0,0 @@ -use pest::Parser; -use pest_derive::Parser; -use std::collections::HashMap; -use serde_json; - -#[derive(Parser)] -#[grammar = "filter.pest"] -pub struct FilterParser; - -#[derive(Debug)] -pub struct Filter { - pub name: String, - pub options: HashMap, -} - -pub fn parse_filter_string(input: &str) -> Result, Box> { - let mut filters = Vec::new(); - let pairs = FilterParser::parse(::Rule::filters, input)?; - - for pair in pairs { - if pair.as_rule() == ::Rule::filter { - let mut name = String::new(); - let mut options = HashMap::new(); - - for inner_pair in pair.into_inner() { - match inner_pair.as_rule() { - ::Rule::filter_name => { - name = inner_pair.as_str().to_string(); - } - ::Rule::options => { - for option_pair in inner_pair.into_inner() { - if option_pair.as_rule() == ::Rule::option { - let mut option_name = None; - let mut option_value = None; - - for option_inner in option_pair.into_inner() { - match option_inner.as_rule() { - ::Rule::option_name => { - option_name = Some(option_inner.as_str().to_string()); - } - ::Rule::option_value => { - option_value = Some(parse_option_value(option_inner.as_str())?); - } - _ => {} - } - } - - if let Some(value) = option_value { - // If no name is provided, use the filter name as the key - let key = option_name.unwrap_or_else(|| name.clone()); - options.insert(key, value); - } - } - } - } - _ => {} - } - } - - filters.push(Filter { name, options }); - } - } - - Ok(filters) -} - -fn parse_option_value(input: &str) -> Result> { - serde_json::from_str(input).map_err(|e| Box::new(e) as Box) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_parse_simple_filter() { - let result = parse_filter_string("grep").unwrap(); - assert_eq!(result.len(), 1); - assert_eq!(result[0].name, "grep"); - assert!(result[0].options.is_empty()); - } - - #[test] - fn test_parse_filter_with_options() { - let result = parse_filter_string("head_lines(10)").unwrap(); - assert_eq!(result.len(), 1); - assert_eq!(result[0].name, "head_lines"); - assert_eq!(result[0].options.len(), 1); - if let serde_json::Value::Number(n) = result[0].options.get("head_lines").unwrap() { - assert_eq!(n.as_i64(), Some(10)); - } else { - panic!("Expected number"); - } - } - - #[test] - fn test_parse_filter_with_named_options() { - let result = parse_filter_string(r#"grep(pattern="error")"#).unwrap(); - assert_eq!(result.len(), 1); - assert_eq!(result[0].name, "grep"); - assert_eq!(result[0].options.get("pattern").unwrap().as_str(), Some("error")); - } - - #[test] - fn test_parse_multiple_filters() { - let result = parse_filter_string(r#"head_lines(10),grep(pattern="error")"#).unwrap(); - assert_eq!(result.len(), 2); - assert_eq!(result[0].name, "head_lines"); - assert_eq!(result[0].options.len(), 1); - if let serde_json::Value::Number(n) = result[0].options.get("head_lines").unwrap() { - assert_eq!(n.as_i64(), Some(10)); - } else { - panic!("Expected number"); - } - assert_eq!(result[1].name, "grep"); - assert_eq!(result[1].options.len(), 1); - assert_eq!(result[1].options.get("pattern").unwrap().as_str(), Some("error")); - } -} diff --git a/src/parser/mod.rs b/src/parser/mod.rs deleted file mode 100644 index aea4897..0000000 --- a/src/parser/mod.rs +++ /dev/null @@ -1,15 +0,0 @@ -/// Parsing utilities for filters and other inputs. -/// -/// This module provides tools for parsing filter strings and other structured -/// inputs used throughout the application. Currently, it includes a pest-based -/// parser for filter expressions. -/// -/// # Examples -/// -/// ``` -/// use keep::parser::parse_filter_string; -/// let filters = parse_filter_string("head:5|grep:hello").unwrap(); -/// ``` -pub mod filter_parser; - -pub use filter_parser::{FilterParser, parse_filter_string}; diff --git a/src/services/async_item_service.rs b/src/services/async_item_service.rs index 47d10bb..a0c1cae 100644 --- a/src/services/async_item_service.rs +++ b/src/services/async_item_service.rs @@ -155,11 +155,10 @@ impl AsyncItemService { .map(|s| s.to_string()) .unwrap_or_else(|| "application/octet-stream".to_string()); - let is_binary = if let Some(text_val) = metadata.get("text") { - text_val == "false" - } else { - crate::common::is_binary::is_binary(&content_clone) - }; + let is_binary = crate::common::is_binary::is_content_binary_from_metadata( + &metadata, + &content_clone, + ); Ok::<_, CoreError>((mime_type, is_binary)) }) diff --git a/src/services/item_service.rs b/src/services/item_service.rs index fb5fc10..0176ab0 100644 --- a/src/services/item_service.rs +++ b/src/services/item_service.rs @@ -234,18 +234,14 @@ impl ItemService { compression: &str, metadata: &HashMap, ) -> Result { - // Check if we already have text metadata - if let Some(text_val) = metadata.get("text") { - return Ok(text_val == "false"); - } - // Read only the first 8192 bytes for binary detection let mut sample_reader = self .compression_service .stream_item_content(item_path, compression)?; let mut sample_buffer = vec![0; 8192]; let bytes_read = sample_reader.read(&mut sample_buffer)?; - Ok(crate::common::is_binary::is_binary( + Ok(crate::common::is_binary::is_content_binary_from_metadata( + metadata, &sample_buffer[..bytes_read], )) } @@ -516,7 +512,9 @@ impl ItemService { let mut result = Vec::new(); for item in items { - let item_id = item.id.unwrap(); + let item_id = item + .id + .ok_or_else(|| CoreError::InvalidInput("Item missing ID".to_string()))?; let tags = tags_map.get(&item_id).cloned().unwrap_or_default(); let meta_hm = meta_map_db.get(&item_id).cloned().unwrap_or_default(); let meta = meta_hm @@ -636,7 +634,9 @@ impl ItemService { let mut item; { item = db::create_item(conn, compression_type.clone())?; - item_id = item.id.unwrap(); + item_id = item + .id + .ok_or_else(|| CoreError::InvalidInput("Item missing ID".to_string()))?; debug!("ITEM_SERVICE: Created new item with id: {item_id}"); db::set_item_tags(conn, item.clone(), tags)?; debug!("ITEM_SERVICE: Set tags for item {item_id}"); @@ -770,7 +770,9 @@ impl ItemService { { item = db::create_item(conn, compression_type.clone())?; - item_id = item.id.unwrap(); + item_id = item + .id + .ok_or_else(|| CoreError::InvalidInput("Item missing ID".to_string()))?; debug!("ITEM_SERVICE: Created MCP item with id: {item_id}"); // Add tags diff --git a/src/services/meta_service.rs b/src/services/meta_service.rs index 8c007ff..44ff8a9 100644 --- a/src/services/meta_service.rs +++ b/src/services/meta_service.rs @@ -21,7 +21,7 @@ impl MetaService { // Create plugins with their configuration let meta_plugins: Vec> = meta_plugin_types .iter() - .map(|meta_plugin_type| { + .filter_map(|meta_plugin_type| { debug!("META_SERVICE: Creating plugin: {meta_plugin_type:?}"); // Get the plugin name using strum's Display implementation @@ -52,7 +52,13 @@ impl MetaService { (None, None) }; - crate::meta_plugin::get_meta_plugin(meta_plugin_type.clone(), options, outputs) + match crate::meta_plugin::get_meta_plugin(meta_plugin_type.clone(), options, outputs) { + Ok(plugin) => Some(plugin), + Err(e) => { + log::warn!("META_SERVICE: Failed to create plugin {meta_plugin_type:?}: {e}, skipping"); + None + } + } }) .collect(); diff --git a/src/services/sync_data_service.rs b/src/services/sync_data_service.rs index c52c2b8..ebb36d8 100644 --- a/src/services/sync_data_service.rs +++ b/src/services/sync_data_service.rs @@ -73,7 +73,9 @@ impl SyncDataService { reader.read_to_end(&mut content)?; let item = self.save_item(&*content, &mut cmd, settings, &mut tags, conn)?; - let item_id = item.id.unwrap(); + let item_id = item + .id + .ok_or_else(|| CoreError::InvalidInput("Item missing ID".to_string()))?; // Set metadata for (key, value) in metadata { @@ -109,85 +111,8 @@ impl SyncDataService { compress: bool, run_meta: bool, ) -> Result { - let mut cmd = Command::new("keep"); - let settings = &self.settings; - let mut tags = tags; - - if tags.is_empty() { - tags.push("none".to_string()); - } - - let compression_type = if compress { - settings_compression_type(&mut cmd, settings) - } else { - CompressionType::None - }; - - let compression_engine = get_compression_engine(compression_type.clone())?; - - let item_id; - let mut item; - { - item = crate::db::create_item(conn, compression_type.clone())?; - item_id = item.id.unwrap(); - crate::db::set_item_tags(conn, item.clone(), &tags)?; - } - - // Initialize meta plugins if requested - let meta_service = MetaService::new(); - let mut plugins = if run_meta { - meta_service.get_plugins(&mut cmd, settings) - } else { - Vec::new() - }; - - if run_meta { - meta_service.initialize_plugins(&mut plugins, conn, item_id); - } - - // Write content to file - let mut item_path = self.item_service.get_data_path().clone(); - item_path.push(item_id.to_string()); - - let mut item_out = compression_engine.create(item_path)?; - - let mut total_bytes = 0i64; - const PIPESIZE: usize = 65536; - - if run_meta && !plugins.is_empty() { - // Process in chunks for meta plugins - let mut offset = 0; - while offset < content.len() { - let end = std::cmp::min(offset + PIPESIZE, content.len()); - let chunk = &content[offset..end]; - item_out.write_all(chunk)?; - total_bytes += chunk.len() as i64; - meta_service.process_chunk(&mut plugins, chunk, conn, item_id); - offset = end; - } - } else { - // Write all at once, no meta processing - item_out.write_all(content)?; - total_bytes = content.len() as i64; - } - - item_out.flush()?; - drop(item_out); - - // Finalize meta plugins - if run_meta { - meta_service.finalize_plugins(&mut plugins, conn, item_id); - } - - // Add client-provided metadata - for (key, value) in &metadata { - crate::db::add_meta(conn, item_id, key, value)?; - } - - item.size = Some(total_bytes); - crate::db::update_item(conn, item)?; - - self.get_item(conn, item_id) + let mut cursor = Cursor::new(content); + self.save_item_raw_streaming(conn, &mut cursor, tags, metadata, compress, run_meta) } /// Save an item from a streaming reader with granular control over compression. @@ -224,7 +149,9 @@ impl SyncDataService { let mut item; { item = crate::db::create_item(conn, compression_type.clone())?; - item_id = item.id.unwrap(); + item_id = item + .id + .ok_or_else(|| CoreError::InvalidInput("Item missing ID".to_string()))?; crate::db::set_item_tags(conn, item.clone(), &tags)?; } @@ -246,7 +173,7 @@ impl SyncDataService { let mut item_out = compression_engine.create(item_path)?; - let mut buffer = [0u8; 65536]; + let mut buffer = [0u8; crate::common::PIPESIZE]; let mut total_bytes = 0i64; loop {