fix: harden security, eliminate panics, remove dead code, add Dockerfile

Security:
- Use constant-time password comparison (subtle crate) to prevent timing attacks
- Replace permissive CORS with configurable origin-restricted CORS
- Add TLS warning when password auth is used without HTTPS

Bug fixes:
- Convert MetaPlugin panics to anyhow::Result (get_meta_plugin, outputs_mut, options_mut)
- Replace item.id.unwrap() with proper error handling across 15 call sites
- Fix panic on unknown column type in list mode
- Fix conflicting PIPESIZE constant (was 8192 vs 65536, now unified to 8192)
- Add 256MB filter chain buffer limit to prevent OOM
- Gracefully skip unregistered plugins instead of panicking

Dead code removal:
- Delete unused filter parser files (filter_parser.rs, filter.pest, parser/ module)
- ~260 lines of dead PEG parser code removed

Code consolidation:
- Add is_content_binary_from_metadata() helper (was duplicated in 4 places)
- Simplify save_item_raw() to delegate to save_item_raw_streaming() (~90 lines removed)

Incomplete features:
- Populate filter_plugins in status output from global registry
- Add FallbackMagicFileMetaPlugin (was referenced but never implemented)
- Document init_plugins() as intentional no-op

Infrastructure:
- Add Dockerfile (static musl binary on scratch, 4.8MB)
- Add .dockerignore
- Add cors_origin to ServerConfig and config.rs
This commit is contained in:
2026-03-13 07:57:36 -03:00
parent bee980605f
commit b166477202
43 changed files with 561 additions and 687 deletions

5
.dockerignore Normal file
View File

@@ -0,0 +1,5 @@
target/
.git/
*.db
keep.db
bin/

1
Cargo.lock generated
View File

@@ -1575,6 +1575,7 @@ dependencies = [
"stderrlog", "stderrlog",
"strip-ansi-escapes", "strip-ansi-escapes",
"strum", "strum",
"subtle",
"tempfile", "tempfile",
"term", "term",
"thiserror 1.0.69", "thiserror 1.0.69",

View File

@@ -52,6 +52,7 @@ serde_json = "1.0.142"
serde_yaml = "0.9.34" serde_yaml = "0.9.34"
sha2 = "0.10.0" sha2 = "0.10.0"
md5 = "0.7.0" md5 = "0.7.0"
subtle = "2.6"
stderrlog = "0.6.0" stderrlog = "0.6.0"
strum = { version = "0.27.2", features = ["derive"] } strum = { version = "0.27.2", features = ["derive"] }
term = "1.1.0" term = "1.1.0"

36
Dockerfile Normal file
View File

@@ -0,0 +1,36 @@
# Build stage
FROM rust:1.88-slim AS builder
RUN apt-get update && apt-get install -y --no-install-recommends \
cmake \
make \
gcc \
musl-tools \
pkg-config \
&& rm -rf /var/lib/apt/lists/*
RUN rustup target add x86_64-unknown-linux-musl
WORKDIR /app
# Copy manifests and fetch dependencies (cached layer)
COPY Cargo.toml Cargo.lock ./
RUN mkdir src && echo 'fn main() {}' > src/main.rs && echo '' > src/lib.rs
RUN cargo fetch --target x86_64-unknown-linux-musl
# Copy real source and build static binary
# magic feature excluded (requires shared libmagic; fallback uses `file` command)
COPY src/ src/
RUN cargo build --release --target x86_64-unknown-linux-musl \
--no-default-features --features lz4,gzip \
&& strip target/x86_64-unknown-linux-musl/release/keep
# Runtime stage - scratch since binary is fully static
FROM scratch
COPY --from=builder /app/target/x86_64-unknown-linux-musl/release/keep /keep
EXPOSE 21080
ENTRYPOINT ["/keep"]

View File

@@ -229,3 +229,25 @@ fn calculate_printable_ratio(data: &[u8]) -> f64 {
printable_count as f64 / data.len() as f64 printable_count as f64 / data.len() as f64
} }
/// Check if content is binary, using metadata as a fast path.
///
/// First checks for a "text" metadata field:
/// - "false" means binary
/// - "true" means text
/// - Absent or other values fall back to byte sampling
///
/// # Arguments
///
/// * `metadata` - Key-value metadata map (e.g., from `meta_as_map()`)
/// * `data` - Byte sample to analyze if metadata is inconclusive
pub fn is_content_binary_from_metadata(
metadata: &std::collections::HashMap<String, String>,
data: &[u8],
) -> bool {
if let Some(text_val) = metadata.get("text") {
text_val == "false"
} else {
is_binary(data)
}
}

View File

@@ -134,9 +134,16 @@ pub fn generate_status_info(
for meta_plugin_type in sorted_meta_plugins { for meta_plugin_type in sorted_meta_plugins {
log::debug!("STATUS: Processing meta plugin type: {meta_plugin_type:?}"); log::debug!("STATUS: Processing meta plugin type: {meta_plugin_type:?}");
log::debug!("STATUS: About to call get_meta_plugin"); let meta_plugin =
let meta_plugin = crate::meta_plugin::get_meta_plugin(meta_plugin_type.clone(), None, None); match crate::meta_plugin::get_meta_plugin(meta_plugin_type.clone(), None, None) {
log::debug!("STATUS: Created meta plugin instance"); Ok(p) => p,
Err(e) => {
log::warn!(
"STATUS: Skipping unregistered meta plugin {meta_plugin_type:?}: {e}"
);
continue;
}
};
// Get meta name first to avoid borrowing issues // Get meta name first to avoid borrowing issues
log::debug!("STATUS: Getting meta name..."); log::debug!("STATUS: Getting meta name...");
@@ -175,12 +182,26 @@ pub fn generate_status_info(
); );
} }
// Populate filter plugin info from the global registry
let filter_plugins_map = crate::services::filter_service::get_available_filter_plugins();
let filter_plugins_info: Vec<FilterPluginInfo> = filter_plugins_map
.into_iter()
.map(|(name, creator)| {
let plugin = creator();
FilterPluginInfo {
name: name.clone(),
options: plugin.options(),
description: format!("{name} filter plugin"),
}
})
.collect();
StatusInfo { StatusInfo {
paths: path_info, paths: path_info,
compression: compression_info, compression: compression_info,
meta_plugins: meta_plugins_map, meta_plugins: meta_plugins_map,
enabled_meta_plugins: enabled_meta_plugins_vec, enabled_meta_plugins: enabled_meta_plugins_vec,
filter_plugins: Vec::new(), filter_plugins: filter_plugins_info,
configured_meta_plugins: None, configured_meta_plugins: None,
} }
} }

View File

@@ -148,6 +148,7 @@ pub struct ServerConfig {
pub password_hash: Option<String>, pub password_hash: Option<String>,
pub cert_file: Option<PathBuf>, pub cert_file: Option<PathBuf>,
pub key_file: Option<PathBuf>, pub key_file: Option<PathBuf>,
pub cors_origin: Option<String>,
} }
#[derive(Debug, Clone, Deserialize, Serialize)] #[derive(Debug, Clone, Deserialize, Serialize)]
@@ -502,6 +503,10 @@ impl Settings {
self.server.as_ref().and_then(|s| s.key_file.clone()) self.server.as_ref().and_then(|s| s.key_file.clone())
} }
pub fn server_cors_origin(&self) -> Option<String> {
self.server.as_ref().and_then(|s| s.cors_origin.clone())
}
pub fn compression(&self) -> Option<String> { pub fn compression(&self) -> Option<String> {
self.compression_plugin.as_ref().map(|c| c.name.clone()) self.compression_plugin.as_ref().map(|c| c.name.clone())
} }

View File

@@ -1,47 +0,0 @@
# This Pest grammar defines the syntax for filter chains used in the Keep application.
# Filters can be chained with commas and may have named or unnamed options with JSON-like values.
WHITESPACE = _{ " " | "\t" | "\n" | "\r" }
# Top-level rule for parsing multiple filters separated by commas.
filters = { filter ~ ("," ~ filters)? }
# A single filter consisting of a name optionally followed by parenthesized options.
filter = { filter_name ~ ("(" ~ options ~ ")")? }
# The name of a filter, starting with an ASCII letter followed by alphanumeric characters or underscores.
filter_name = @{ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* }
# A list of comma-separated options within parentheses.
options = { option ~ ("," ~ options)? }
# A single option, optionally with a name followed by an equals sign and a value.
option = { (option_name ~ "=")? ~ option_value }
# The name of an option, starting with an ASCII letter followed by alphanumeric characters or underscores.
option_name = @{ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* }
# The value of an option, which can be a JSON number, string, or boolean.
option_value = {
JSON_NUMBER |
JSON_STRING |
JSON_BOOLEAN
}
# JSON number format supporting integers, decimals, and scientific notation.
JSON_NUMBER = @{
("-")? ~
("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*) ~
("." ~ ASCII_DIGIT*)? ~
(("e" | "E") ~ ("+" | "-")? ~ ASCII_DIGIT+)?
}
# JSON string format with escaped characters.
JSON_STRING = ${
"\"" ~
(("\\" ~ ANY) | (!("\"" | "\\") ~ ANY))* ~
"\""
}
# JSON boolean values: true or false.
JSON_BOOLEAN = ${ "true" | "false" }

View File

@@ -1,131 +0,0 @@
use pest::Parser;
use pest_derive::Parser;
use std::collections::HashMap;
#[derive(Parser)]
#[grammar = "filter.pest"]
pub struct FilterParser;
#[derive(Debug)]
pub struct Filter {
pub name: String,
pub options: HashMap<String, serde_json::Value>,
}
pub fn parse_filter_string(input: &str) -> Result<Vec<Filter>, Box<dyn std::error::Error>> {
let mut filters = Vec::new();
let pairs = FilterParser::parse(Rule::filters, input)?;
for pair in pairs {
if pair.as_rule() == Rule::filter {
let mut name = String::new();
let mut options = HashMap::new();
for inner_pair in pair.into_inner() {
match inner_pair.as_rule() {
Rule::filter_name => {
name = inner_pair.as_str().to_string();
}
Rule::options => {
for option_pair in inner_pair.into_inner() {
if option_pair.as_rule() == Rule::option {
let mut option_name = None;
let mut option_value = None;
for option_inner in option_pair.into_inner() {
match option_inner.as_rule() {
Rule::option_name => {
option_name = Some(option_inner.as_str().to_string());
}
Rule::option_value => {
option_value = Some(parse_option_value(option_inner.as_str())?);
}
_ => {}
}
}
if let Some(value) = option_value {
// If no name is provided, use the filter name as the key
let key = option_name.unwrap_or_else(|| name.clone());
options.insert(key, value);
}
}
}
}
_ => {}
}
}
filters.push(Filter { name, options });
}
}
Ok(filters)
}
fn parse_option_value(input: &str) -> Result<serde_json::Value, Box<dyn std::error::Error>> {
// Try to parse as number
if let Ok(num) = input.parse::<i64>() {
return Ok(serde_json::Value::Number(num.into()));
}
if let Ok(num) = input.parse::<f64>() {
if let Some(number) = serde_json::Number::from_f64(num) {
return Ok(serde_json::Value::Number(number));
}
}
// Try to parse as boolean
if let Ok(boolean) = input.parse::<bool>() {
return Ok(serde_json::Value::Bool(boolean));
}
// Treat as string (remove quotes if present)
let value = if input.starts_with('"') && input.ends_with('"') {
input[1..input.len()-1].to_string()
} else if input.starts_with('\'') && input.ends_with('\'') {
input[1..input.len()-1].to_string()
} else {
input.to_string()
};
Ok(serde_json::Value::String(value))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_simple_filter() {
let result = parse_filter_string("grep").unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].name, "grep");
assert!(result[0].options.is_empty());
}
#[test]
fn test_parse_filter_with_options() {
let result = parse_filter_string("head_lines(10)").unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].name, "head_lines");
assert_eq!(result[0].options["head_lines"], 10);
}
#[test]
fn test_parse_filter_with_named_options() {
let result = parse_filter_string("grep(pattern=\"error\")").unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].name, "grep");
assert_eq!(result[0].options["pattern"], "error");
}
#[test]
fn test_parse_multiple_filters() {
let result = parse_filter_string("head_lines(10), grep(pattern=\"error\")").unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].name, "head_lines");
assert_eq!(result[0].options["head_lines"], 10);
assert_eq!(result[1].name, "grep");
assert_eq!(result[1].options["pattern"], "error");
}
}

View File

@@ -194,6 +194,10 @@ pub enum FilterType {
StripAnsi, StripAnsi,
} }
/// Maximum buffer size (256 MB) for filter chain intermediate results.
/// Prevents OOM on large files by rejecting inputs that exceed this limit.
const MAX_FILTER_BUFFER_SIZE: usize = 256 * 1024 * 1024;
/// A chain of filter plugins applied sequentially. /// A chain of filter plugins applied sequentially.
/// ///
/// Chains multiple filters, applying them in order to the input stream. /// Chains multiple filters, applying them in order to the input stream.
@@ -334,6 +338,18 @@ impl FilterChain {
let mut current_data = Vec::new(); let mut current_data = Vec::new();
std::io::copy(reader, &mut current_data)?; std::io::copy(reader, &mut current_data)?;
if current_data.len() > MAX_FILTER_BUFFER_SIZE {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!(
"Input size ({} bytes) exceeds maximum filter buffer size ({} bytes). \
Consider using fewer filter plugins or smaller inputs.",
current_data.len(),
MAX_FILTER_BUFFER_SIZE
),
));
}
// Store the plugins length to avoid borrowing issues // Store the plugins length to avoid borrowing issues
let plugins_len = self.plugins.len(); let plugins_len = self.plugins.len();
@@ -348,6 +364,18 @@ impl FilterChain {
// For intermediate plugins, write to a buffer // For intermediate plugins, write to a buffer
let mut output_vec = Vec::new(); let mut output_vec = Vec::new();
self.plugins[i].filter(&mut input, &mut output_vec)?; self.plugins[i].filter(&mut input, &mut output_vec)?;
if output_vec.len() > MAX_FILTER_BUFFER_SIZE {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!(
"Filter output size ({} bytes) exceeds maximum filter buffer size ({} bytes).",
output_vec.len(),
MAX_FILTER_BUFFER_SIZE
),
));
}
current_data = output_vec; current_data = output_vec;
} }
} }

View File

@@ -62,8 +62,13 @@ use crate::meta_plugin::magic_file;
/// Initializes plugins at library load time. /// Initializes plugins at library load time.
/// ///
/// Ensures all filter and meta plugins are registered via their ctors. /// Plugin registration happens automatically via `#[ctor]` constructors
/// Call this early in application startup if needed (though ctors handle most cases). /// when each plugin module is loaded. The explicit module imports in
/// `lib.rs` guarantee this happens at library initialization time.
///
/// This function exists as a public API entry point for callers that
/// want to explicitly ensure plugins are ready. It intentionally does
/// no additional work.
/// ///
/// # Examples /// # Examples
/// ///
@@ -71,8 +76,8 @@ use crate::meta_plugin::magic_file;
/// keep::init_plugins(); /// keep::init_plugins();
/// ``` /// ```
pub fn init_plugins() { pub fn init_plugins() {
// This will be expanded in Step 3 implementation // Plugins self-register via #[ctor] on module load.
// For now, the ctors handle registration // The use-statements in lib.rs guarantee module inclusion.
} }
#[cfg(test)] #[cfg(test)]

View File

@@ -105,16 +105,20 @@ impl MetaPlugin for CwdMetaPlugin {
self.base.outputs() self.base.outputs()
} }
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn outputs_mut(
self.base.outputs_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.outputs_mut())
} }
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> { fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
self.base.options() self.base.options()
} }
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn options_mut(
self.base.options_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.options_mut())
} }
} }
use crate::meta_plugin::register_meta_plugin; use crate::meta_plugin::register_meta_plugin;

View File

@@ -235,8 +235,10 @@ impl MetaPlugin for DigestMetaPlugin {
self.base.outputs() self.base.outputs()
} }
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn outputs_mut(
self.base.outputs_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.outputs_mut())
} }
fn default_outputs(&self) -> Vec<String> { fn default_outputs(&self) -> Vec<String> {
@@ -251,8 +253,10 @@ impl MetaPlugin for DigestMetaPlugin {
self.base.options() self.base.options()
} }
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn options_mut(
self.base.options_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.options_mut())
} }
} }

View File

@@ -183,8 +183,10 @@ impl MetaPlugin for EnvMetaPlugin {
/// # Returns /// # Returns
/// ///
/// A mutable reference to the `HashMap` of outputs. /// A mutable reference to the `HashMap` of outputs.
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn outputs_mut(
self.base.outputs_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.outputs_mut())
} }
/// Returns the default output names based on collected env vars. /// Returns the default output names based on collected env vars.
@@ -212,8 +214,10 @@ impl MetaPlugin for EnvMetaPlugin {
/// # Panics /// # Panics
/// ///
/// Panics with "options_mut() not implemented for EnvMetaPlugin". /// Panics with "options_mut() not implemented for EnvMetaPlugin".
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn options_mut(
self.base.options_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.options_mut())
} }
} }
use crate::meta_plugin::register_meta_plugin; use crate::meta_plugin::register_meta_plugin;

View File

@@ -244,16 +244,20 @@ impl MetaPlugin for MetaPluginExec {
&self.base.outputs &self.base.outputs
} }
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn outputs_mut(
&mut self.base.outputs &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(&mut self.base.outputs)
} }
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> { fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
&self.base.options &self.base.options
} }
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn options_mut(
&mut self.base.options &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(&mut self.base.options)
} }
fn default_outputs(&self) -> Vec<String> { fn default_outputs(&self) -> Vec<String> {

View File

@@ -375,8 +375,10 @@ impl MetaPlugin for HostnameMetaPlugin {
self.base.outputs() self.base.outputs()
} }
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn outputs_mut(
self.base.outputs_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.outputs_mut())
} }
fn default_outputs(&self) -> Vec<String> { fn default_outputs(&self) -> Vec<String> {
@@ -391,8 +393,10 @@ impl MetaPlugin for HostnameMetaPlugin {
self.base.options() self.base.options()
} }
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn options_mut(
self.base.options_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.options_mut())
} }
} }
use crate::meta_plugin::register_meta_plugin; use crate::meta_plugin::register_meta_plugin;

View File

@@ -162,8 +162,10 @@ impl MetaPlugin for KeepPidMetaPlugin {
/// # Returns /// # Returns
/// ///
/// A mutable reference to the `HashMap` of outputs. /// A mutable reference to the `HashMap` of outputs.
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn outputs_mut(
self.base.outputs_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.outputs_mut())
} }
/// Returns the default output names for this plugin. /// Returns the default output names for this plugin.
@@ -189,8 +191,10 @@ impl MetaPlugin for KeepPidMetaPlugin {
/// # Returns /// # Returns
/// ///
/// A mutable reference to the `HashMap` of options. /// A mutable reference to the `HashMap` of options.
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn options_mut(
self.base.options_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.options_mut())
} }
} }
use crate::meta_plugin::register_meta_plugin; use crate::meta_plugin::register_meta_plugin;

View File

@@ -21,16 +21,23 @@ impl MagicFileMetaPlugin {
) -> MagicFileMetaPlugin { ) -> MagicFileMetaPlugin {
// Start with default options // Start with default options
let mut final_options = std::collections::HashMap::new(); let mut final_options = std::collections::HashMap::new();
final_options.insert("max_buffer_size".to_string(), serde_yaml::Value::Number(PIPESIZE.into())); final_options.insert(
"max_buffer_size".to_string(),
serde_yaml::Value::Number(PIPESIZE.into()),
);
if let Some(opts) = options { if let Some(opts) = options {
for (key, value) in opts { for (key, value) in opts {
final_options.insert(key, value); final_options.insert(key, value);
} }
} }
// Start with default outputs // Start with default outputs
let mut final_outputs = std::collections::HashMap::new(); let mut final_outputs = std::collections::HashMap::new();
let default_outputs = vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()]; let default_outputs = vec![
"mime_type".to_string(),
"mime_encoding".to_string(),
"file_type".to_string(),
];
for output_name in default_outputs { for output_name in default_outputs {
final_outputs.insert(output_name.clone(), serde_yaml::Value::String(output_name)); final_outputs.insert(output_name.clone(), serde_yaml::Value::String(output_name));
} }
@@ -39,20 +46,24 @@ impl MagicFileMetaPlugin {
final_outputs.insert(key, value); final_outputs.insert(key, value);
} }
} }
let max_buffer_size = final_options.get("max_buffer_size") let max_buffer_size = final_options
.get("max_buffer_size")
.and_then(|v| v.as_u64()) .and_then(|v| v.as_u64())
.unwrap_or(PIPESIZE as u64) as usize; .unwrap_or(PIPESIZE as u64) as usize;
// Ensure the default max_buffer_size is in the options // Ensure the default max_buffer_size is in the options
if !final_options.contains_key("max_buffer_size") { if !final_options.contains_key("max_buffer_size") {
final_options.insert("max_buffer_size".to_string(), serde_yaml::Value::Number(PIPESIZE.into())); final_options.insert(
"max_buffer_size".to_string(),
serde_yaml::Value::Number(PIPESIZE.into()),
);
} }
let mut base = crate::meta_plugin::BaseMetaPlugin::new(); let mut base = crate::meta_plugin::BaseMetaPlugin::new();
base.outputs = final_outputs; base.outputs = final_outputs;
base.options = final_options; base.options = final_options;
MagicFileMetaPlugin { MagicFileMetaPlugin {
buffer: Vec::new(), buffer: Vec::new(),
max_buffer_size, max_buffer_size,
@@ -61,24 +72,33 @@ impl MagicFileMetaPlugin {
base, base,
} }
} }
fn get_magic_result(&self, flags: CookieFlags) -> io::Result<String> { fn get_magic_result(&self, flags: CookieFlags) -> io::Result<String> {
// Use the existing cookie and just change flags // Use the existing cookie and just change flags
if let Some(cookie) = &self.cookie { if let Some(cookie) = &self.cookie {
cookie.set_flags(flags) cookie.set_flags(flags).map_err(|e| {
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to set magic flags: {}", e)))?; io::Error::new(
io::ErrorKind::Other,
format!("Failed to set magic flags: {}", e),
)
})?;
let result = cookie.buffer(&self.buffer) let result = cookie.buffer(&self.buffer).map_err(|e| {
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?; io::Error::new(
io::ErrorKind::Other,
format!("Failed to analyze buffer: {}", e),
)
})?;
// Clean up the result - remove extra whitespace and take first part if needed // Clean up the result - remove extra whitespace and take first part if needed
let trimmed = result.trim(); let trimmed = result.trim();
// For some magic results, we might want just the first part before semicolon or comma // For some magic results, we might want just the first part before semicolon or comma
let cleaned = if trimmed.contains(';') { let cleaned = if trimmed.contains(';') {
trimmed.split(';').next().unwrap_or(trimmed).trim() trimmed.split(';').next().unwrap_or(trimmed).trim()
} else if trimmed.contains(',') && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) { } else if trimmed.contains(',')
&& flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING)
{
trimmed.split(',').next().unwrap_or(trimmed).trim() trimmed.split(',').next().unwrap_or(trimmed).trim()
} else { } else {
trimmed trimmed
@@ -86,36 +106,39 @@ impl MagicFileMetaPlugin {
Ok(cleaned.to_string()) Ok(cleaned.to_string())
} else { } else {
Err(io::Error::new(io::ErrorKind::Other, "Magic cookie not initialized")) Err(io::Error::new(
io::ErrorKind::Other,
"Magic cookie not initialized",
))
} }
} }
/// Helper function to process all magic types and collect metadata /// Helper function to process all magic types and collect metadata
fn process_magic_types(&self) -> Vec<crate::meta_plugin::MetaData> { fn process_magic_types(&self) -> Vec<crate::meta_plugin::MetaData> {
let mut metadata = Vec::new(); let mut metadata = Vec::new();
// Define the types to process with their corresponding flags // Define the types to process with their corresponding flags
let types_to_process = [ let types_to_process = [
("mime_type", CookieFlags::MIME_TYPE), ("mime_type", CookieFlags::MIME_TYPE),
("mime_encoding", CookieFlags::MIME_ENCODING), ("mime_encoding", CookieFlags::MIME_ENCODING),
("file_type", CookieFlags::default()), ("file_type", CookieFlags::default()),
]; ];
for (name, flags) in types_to_process.iter() { for (name, flags) in types_to_process.iter() {
if let Ok(result) = self.get_magic_result(*flags) { if let Ok(result) = self.get_magic_result(*flags) {
if !result.is_empty() { if !result.is_empty() {
// Use process_metadata_outputs to handle output mapping // Use process_metadata_outputs to handle output mapping
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
name, name,
serde_yaml::Value::String(result), serde_yaml::Value::String(result),
self.base.outputs() self.base.outputs(),
) { ) {
metadata.push(meta_data); metadata.push(meta_data);
} }
} }
} }
} }
metadata metadata
} }
} }
@@ -129,7 +152,7 @@ impl MetaPlugin for MagicFileMetaPlugin {
fn is_finalized(&self) -> bool { fn is_finalized(&self) -> bool {
self.is_finalized self.is_finalized
} }
/// Sets the finalized state of the plugin. /// Sets the finalized state of the plugin.
/// ///
/// # Arguments /// # Arguments
@@ -138,7 +161,7 @@ impl MetaPlugin for MagicFileMetaPlugin {
fn set_finalized(&mut self, finalized: bool) { fn set_finalized(&mut self, finalized: bool) {
self.is_finalized = finalized; self.is_finalized = finalized;
} }
/// Initializes the magic cookie for file type detection. /// Initializes the magic cookie for file type detection.
/// ///
/// Loads the magic database; finalizes if initialization fails. /// Loads the magic database; finalizes if initialization fails.
@@ -206,9 +229,9 @@ impl MetaPlugin for MagicFileMetaPlugin {
is_finalized: true, is_finalized: true,
}; };
} }
let metadata = self.process_magic_types(); let metadata = self.process_magic_types();
// Mark as finalized // Mark as finalized
self.is_finalized = true; self.is_finalized = true;
@@ -244,7 +267,7 @@ impl MetaPlugin for MagicFileMetaPlugin {
is_finalized: true, is_finalized: true,
}; };
} }
let mut metadata = Vec::new(); let mut metadata = Vec::new();
// Only collect up to max_buffer_size // Only collect up to max_buffer_size
@@ -256,7 +279,7 @@ impl MetaPlugin for MagicFileMetaPlugin {
// Check if we've reached our buffer limit and return metadata // Check if we've reached our buffer limit and return metadata
if self.buffer.len() >= self.max_buffer_size { if self.buffer.len() >= self.max_buffer_size {
metadata = self.process_magic_types(); metadata = self.process_magic_types();
// Mark as finalized when we've processed enough data // Mark as finalized when we've processed enough data
self.is_finalized = true; self.is_finalized = true;
} }
@@ -277,8 +300,7 @@ impl MetaPlugin for MagicFileMetaPlugin {
fn meta_type(&self) -> MetaPluginType { fn meta_type(&self) -> MetaPluginType {
MetaPluginType::MagicFile MetaPluginType::MagicFile
} }
/// Returns a reference to the outputs mapping. /// Returns a reference to the outputs mapping.
/// ///
/// # Returns /// # Returns
@@ -287,26 +309,31 @@ impl MetaPlugin for MagicFileMetaPlugin {
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> { fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
self.base.outputs() self.base.outputs()
} }
/// Returns a mutable reference to the outputs mapping. /// Returns a mutable reference to the outputs mapping.
/// ///
/// # Returns /// # Returns
/// ///
/// A mutable reference to the `HashMap` of outputs. /// A mutable reference to the `HashMap` of outputs.
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn outputs_mut(
self.base.outputs_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.outputs_mut())
} }
/// Returns the default output names for this plugin. /// Returns the default output names for this plugin.
/// ///
/// # Returns /// # Returns
/// ///
/// Vector of default output field names. /// Vector of default output field names.
fn default_outputs(&self) -> Vec<String> { fn default_outputs(&self) -> Vec<String> {
vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()] vec![
"mime_type".to_string(),
"mime_encoding".to_string(),
"file_type".to_string(),
]
} }
/// Returns a reference to the options mapping. /// Returns a reference to the options mapping.
/// ///
/// # Returns /// # Returns
@@ -315,14 +342,16 @@ impl MetaPlugin for MagicFileMetaPlugin {
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> { fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
self.base.options() self.base.options()
} }
/// Returns a mutable reference to the options mapping. /// Returns a mutable reference to the options mapping.
/// ///
/// # Returns /// # Returns
/// ///
/// A mutable reference to the `HashMap` of options. /// A mutable reference to the `HashMap` of options.
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn options_mut(
self.base.options_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.options_mut())
} }
} }

View File

@@ -187,8 +187,10 @@ impl MetaPlugin for MagicFileMetaPluginImpl {
self.base.outputs() self.base.outputs()
} }
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn outputs_mut(
self.base.outputs_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.outputs_mut())
} }
fn default_outputs(&self) -> Vec<String> { fn default_outputs(&self) -> Vec<String> {
@@ -203,11 +205,16 @@ impl MetaPlugin for MagicFileMetaPluginImpl {
self.base.options() self.base.options()
} }
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn options_mut(
self.base.options_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.options_mut())
} }
} }
#[cfg(feature = "magic")]
pub use MagicFileMetaPluginImpl as MagicFileMetaPlugin;
#[cfg(not(feature = "magic"))] #[cfg(not(feature = "magic"))]
#[derive(Debug)] #[derive(Debug)]
pub struct FallbackMagicFileMetaPlugin { pub struct FallbackMagicFileMetaPlugin {
@@ -222,21 +229,18 @@ impl FallbackMagicFileMetaPlugin {
pub fn new( pub fn new(
options: Option<std::collections::HashMap<String, serde_yaml::Value>>, options: Option<std::collections::HashMap<String, serde_yaml::Value>>,
outputs: Option<std::collections::HashMap<String, serde_yaml::Value>>, outputs: Option<std::collections::HashMap<String, serde_yaml::Value>>,
) -> FallbackMagicFileMetaPlugin { ) -> Self {
let mut base = BaseMetaPlugin::new(); let mut base = BaseMetaPlugin::new();
// Set default outputs
let default_outputs = &["mime_type", "mime_encoding", "file_type"]; let default_outputs = &["mime_type", "mime_encoding", "file_type"];
base.initialize_plugin(default_outputs, &options, &outputs); base.initialize_plugin(default_outputs, &options, &outputs);
// Get max_buffer_size from options, default to PIPESIZE
let max_buffer_size = base let max_buffer_size = base
.options .options
.get("max_buffer_size") .get("max_buffer_size")
.and_then(|v| v.as_u64()) .and_then(|v| v.as_u64())
.unwrap_or(crate::common::PIPESIZE as u64) as usize; .unwrap_or(crate::common::PIPESIZE as u64) as usize;
FallbackMagicFileMetaPlugin { Self {
buffer: Vec::new(), buffer: Vec::new(),
max_buffer_size, max_buffer_size,
is_finalized: false, is_finalized: false,
@@ -244,68 +248,75 @@ impl FallbackMagicFileMetaPlugin {
} }
} }
fn run_file_command(&self, buffer: &[u8]) -> io::Result<String> { fn run_file_command(&self, args: &[&str]) -> Option<String> {
let mut temp_file = tempfile::NamedTempFile::new()?;
temp_file.as_ref().write_all(buffer)?;
let output = Command::new("file") let output = Command::new("file")
.arg("-b") .args(args)
.arg("-m") .arg("-")
.arg("all") .stdin(Stdio::piped())
.arg(temp_file.path()) .stdout(Stdio::piped())
.output() .spawn()
.map_err(|e| { .and_then(|mut child| {
io::Error::new( if let Some(mut stdin) = child.stdin.take() {
io::ErrorKind::Other, let _ = stdin.write_all(&self.buffer);
format!("Failed to run file command: {}", e), }
) child.wait_with_output()
})?; });
if !output.status.success() { output
return Err(io::Error::new(io::ErrorKind::Other, "File command failed")); .ok()
} .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
let result = String::from_utf8_lossy(&output.stdout).trim().to_string();
Ok(result)
} }
fn process_file_output(&self, result: &str) -> Vec<MetaData> { fn detect_type(&self) -> Vec<MetaData> {
let mut metadata = Vec::new(); let mut metadata = Vec::new();
// Parse the file command output // Get mime_type and mime_encoding via --mime
// file -m all output format is typically: type; charset=encoding if let Some(mime_line) = self.run_file_command(&["--brief", "--mime"]) {
let parts: Vec<&str> = result.split(';').map(|s| s.trim()).collect(); // Format: "text/plain; charset=us-ascii"
let file_type = parts.first().cloned().unwrap_or(result); if let Some((mime_type, rest)) = mime_line.split_once(';') {
let mime_encoding = parts let mime_type = mime_type.trim().to_string();
.get(1) let mime_encoding = rest
.and_then(|s| s.strip_prefix("charset=")) .trim()
.cloned() .strip_prefix("charset=")
.unwrap_or(""); .unwrap_or("binary")
.to_string();
// For mime_type, try to infer from file type or use a heuristic if let Some(meta_data) = process_metadata_outputs(
let mime_type = if file_type.starts_with("text") { "mime_type",
"text/plain" serde_yaml::Value::String(mime_type),
} else if file_type.contains("ASCII") || file_type.contains("UTF-8") { self.base.outputs(),
"text/plain" ) {
} else if file_type.contains("empty") { metadata.push(meta_data);
"application/octet-stream" }
} else { if let Some(meta_data) = process_metadata_outputs(
"application/octet-stream" // default "mime_encoding",
}; serde_yaml::Value::String(mime_encoding),
self.base.outputs(),
) {
metadata.push(meta_data);
}
} else {
// No charset, just mime type
if let Some(meta_data) = process_metadata_outputs(
"mime_type",
serde_yaml::Value::String(mime_line),
self.base.outputs(),
) {
metadata.push(meta_data);
}
}
}
let outputs_to_process = [ // Get human-readable file type via --brief
("mime_type", mime_type), if let Some(file_type) = self.run_file_command(&["--brief"]) {
("mime_encoding", mime_encoding), if !file_type.is_empty() {
("file_type", file_type), if let Some(meta_data) = process_metadata_outputs(
]; "file_type",
serde_yaml::Value::String(file_type),
for (name, value) in outputs_to_process.iter() { self.base.outputs(),
if let Some(meta_data) = process_metadata_outputs( ) {
name, metadata.push(meta_data);
serde_yaml::Value::String(value.to_string()), }
self.base.outputs(),
) {
metadata.push(meta_data);
} }
} }
@@ -324,7 +335,6 @@ impl MetaPlugin for FallbackMagicFileMetaPlugin {
} }
fn initialize(&mut self) -> MetaPluginResponse { fn initialize(&mut self) -> MetaPluginResponse {
// No initialization needed for fallback
MetaPluginResponse { MetaPluginResponse {
metadata: Vec::new(), metadata: Vec::new(),
is_finalized: false, is_finalized: false,
@@ -339,27 +349,18 @@ impl MetaPlugin for FallbackMagicFileMetaPlugin {
}; };
} }
let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len()); let remaining = self.max_buffer_size.saturating_sub(self.buffer.len());
if remaining_capacity > 0 { if remaining > 0 {
let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity); let n = std::cmp::min(data.len(), remaining);
self.buffer.extend_from_slice(&data[..bytes_to_copy]); self.buffer.extend_from_slice(&data[..n]);
if self.buffer.len() >= self.max_buffer_size { if self.buffer.len() >= self.max_buffer_size {
if let Ok(result) = self.run_file_command(&self.buffer) { let metadata = self.detect_type();
let metadata = self.process_file_output(&result); self.is_finalized = true;
self.is_finalized = true; return MetaPluginResponse {
return MetaPluginResponse { metadata,
metadata, is_finalized: true,
is_finalized: true, };
};
} else {
// On error, finalize with empty metadata
self.is_finalized = true;
return MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
} }
} }
@@ -376,21 +377,9 @@ impl MetaPlugin for FallbackMagicFileMetaPlugin {
is_finalized: true, is_finalized: true,
}; };
} }
let metadata = if !self.buffer.is_empty() {
if let Ok(result) = self.run_file_command(&self.buffer) {
self.process_file_output(&result)
} else {
Vec::new()
}
} else {
Vec::new()
};
self.is_finalized = true; self.is_finalized = true;
MetaPluginResponse { MetaPluginResponse {
metadata, metadata: self.detect_type(),
is_finalized: true, is_finalized: true,
} }
} }
@@ -403,8 +392,10 @@ impl MetaPlugin for FallbackMagicFileMetaPlugin {
self.base.outputs() self.base.outputs()
} }
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn outputs_mut(
self.base.outputs_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.outputs_mut())
} }
fn default_outputs(&self) -> Vec<String> { fn default_outputs(&self) -> Vec<String> {
@@ -419,14 +410,13 @@ impl MetaPlugin for FallbackMagicFileMetaPlugin {
self.base.options() self.base.options()
} }
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn options_mut(
self.base.options_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.options_mut())
} }
} }
#[cfg(feature = "magic")]
pub use MagicFileMetaPluginImpl as MagicFileMetaPlugin;
#[cfg(not(feature = "magic"))] #[cfg(not(feature = "magic"))]
pub use FallbackMagicFileMetaPlugin as MagicFileMetaPlugin; pub use FallbackMagicFileMetaPlugin as MagicFileMetaPlugin;

View File

@@ -10,7 +10,6 @@ pub mod env;
pub mod exec; pub mod exec;
pub mod hostname; pub mod hostname;
pub mod keep_pid; pub mod keep_pid;
#[cfg(feature = "magic")]
pub mod magic_file; pub mod magic_file;
pub mod read_rate; pub mod read_rate;
pub mod read_time; pub mod read_time;
@@ -179,8 +178,10 @@ impl MetaPlugin for BaseMetaPlugin {
/// # Returns /// # Returns
/// ///
/// A mutable reference to the `HashMap` of outputs. /// A mutable reference to the `HashMap` of outputs.
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn outputs_mut(
&mut self.outputs &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(&mut self.outputs)
} }
/// Returns a reference to the options mapping. /// Returns a reference to the options mapping.
@@ -197,8 +198,10 @@ impl MetaPlugin for BaseMetaPlugin {
/// # Returns /// # Returns
/// ///
/// A mutable reference to the `HashMap` of options. /// A mutable reference to the `HashMap` of options.
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn options_mut(
&mut self.options &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(&mut self.options)
} }
} }
@@ -424,11 +427,17 @@ where
/// Returns a mutable reference to the outputs mapping. /// Returns a mutable reference to the outputs mapping.
/// ///
/// # Panics /// # Returns
/// ///
/// Panics with "outputs_mut() not implemented for this plugin". /// A mutable reference to the outputs `HashMap`.
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { ///
panic!("outputs_mut() not implemented for this plugin") /// # Errors
///
/// Returns an error if the plugin does not support mutable outputs.
fn outputs_mut(
&mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
anyhow::bail!("outputs_mut() not supported by this plugin")
} }
/// Returns a reference to the options mapping. /// Returns a reference to the options mapping.
@@ -445,11 +454,17 @@ where
/// Returns a mutable reference to the options mapping. /// Returns a mutable reference to the options mapping.
/// ///
/// # Panics /// # Returns
/// ///
/// Panics with "options_mut() not implemented for this plugin". /// A mutable reference to the options `HashMap`.
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { ///
panic!("options_mut() not implemented for this plugin") /// # Errors
///
/// Returns an error if the plugin does not support mutable options.
fn options_mut(
&mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
anyhow::bail!("options_mut() not supported by this plugin")
} }
/// Gets the default output names this plugin can produce. /// Gets the default output names this plugin can produce.
@@ -496,12 +511,11 @@ pub fn get_meta_plugin(
meta_plugin_type: MetaPluginType, meta_plugin_type: MetaPluginType,
options: Option<std::collections::HashMap<String, serde_yaml::Value>>, options: Option<std::collections::HashMap<String, serde_yaml::Value>>,
outputs: Option<std::collections::HashMap<String, serde_yaml::Value>>, outputs: Option<std::collections::HashMap<String, serde_yaml::Value>>,
) -> Box<dyn MetaPlugin> { ) -> anyhow::Result<Box<dyn MetaPlugin>> {
let registry = META_PLUGIN_REGISTRY.lock().unwrap(); let registry = META_PLUGIN_REGISTRY.lock().unwrap();
if let Some(constructor) = registry.get(&meta_plugin_type) { if let Some(constructor) = registry.get(&meta_plugin_type) {
return constructor(options, outputs); return Ok(constructor(options, outputs));
} }
// Fallback for unknown plugins anyhow::bail!("Meta plugin {meta_plugin_type:?} not registered")
panic!("Meta plugin {meta_plugin_type:?} not registered");
} }

View File

@@ -193,8 +193,10 @@ impl MetaPlugin for ReadRateMetaPlugin {
/// # Returns /// # Returns
/// ///
/// Mutable reference to the outputs HashMap. /// Mutable reference to the outputs HashMap.
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn outputs_mut(
self.base.outputs_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.outputs_mut())
} }
/// Returns the default output names for this plugin. /// Returns the default output names for this plugin.
@@ -222,8 +224,10 @@ impl MetaPlugin for ReadRateMetaPlugin {
/// # Returns /// # Returns
/// ///
/// Mutable reference to the options HashMap. /// Mutable reference to the options HashMap.
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn options_mut(
self.base.options_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.options_mut())
} }
} }
use crate::meta_plugin::register_meta_plugin; use crate::meta_plugin::register_meta_plugin;

View File

@@ -97,8 +97,10 @@ impl MetaPlugin for ReadTimeMetaPlugin {
self.base.outputs() self.base.outputs()
} }
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn outputs_mut(
self.base.outputs_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.outputs_mut())
} }
fn default_outputs(&self) -> Vec<String> { fn default_outputs(&self) -> Vec<String> {
@@ -109,8 +111,10 @@ impl MetaPlugin for ReadTimeMetaPlugin {
self.base.options() self.base.options()
} }
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn options_mut(
self.base.options_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.options_mut())
} }
} }
use crate::meta_plugin::register_meta_plugin; use crate::meta_plugin::register_meta_plugin;

View File

@@ -194,8 +194,10 @@ impl MetaPlugin for ShellMetaPlugin {
/// # Returns /// # Returns
/// ///
/// * `&mut HashMap<String, serde_yaml::Value>` - Mutable outputs map. /// * `&mut HashMap<String, serde_yaml::Value>` - Mutable outputs map.
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn outputs_mut(
self.base.outputs_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.outputs_mut())
} }
/// Returns the default output names for this plugin. /// Returns the default output names for this plugin.
@@ -221,8 +223,10 @@ impl MetaPlugin for ShellMetaPlugin {
/// # Returns /// # Returns
/// ///
/// * `&mut HashMap<String, serde_yaml::Value>` - Mutable options map. /// * `&mut HashMap<String, serde_yaml::Value>` - Mutable options map.
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn options_mut(
self.base.options_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.options_mut())
} }
} }
/// Registers the shell meta plugin with the global registry. /// Registers the shell meta plugin with the global registry.

View File

@@ -109,16 +109,20 @@ impl MetaPlugin for ShellPidMetaPlugin {
self.base.outputs() self.base.outputs()
} }
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn outputs_mut(
self.base.outputs_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.outputs_mut())
} }
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> { fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
self.base.options() self.base.options()
} }
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn options_mut(
self.base.options_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.options_mut())
} }
} }
use crate::meta_plugin::register_meta_plugin; use crate::meta_plugin::register_meta_plugin;

View File

@@ -769,8 +769,10 @@ impl MetaPlugin for TextMetaPlugin {
/// # Returns /// # Returns
/// ///
/// A mutable reference to the `HashMap` of outputs. /// A mutable reference to the `HashMap` of outputs.
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn outputs_mut(
self.base.outputs_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.outputs_mut())
} }
/// Returns the default output names for this plugin. /// Returns the default output names for this plugin.
@@ -803,8 +805,10 @@ impl MetaPlugin for TextMetaPlugin {
/// # Returns /// # Returns
/// ///
/// A mutable reference to the `HashMap` of outputs. /// A mutable reference to the `HashMap` of outputs.
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn options_mut(
self.base.options_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.options_mut())
} }
} }
use crate::meta_plugin::register_meta_plugin; use crate::meta_plugin::register_meta_plugin;

View File

@@ -119,8 +119,10 @@ impl MetaPlugin for UserMetaPlugin {
/// # Returns /// # Returns
/// ///
/// A mutable reference to the `HashMap` of outputs. /// A mutable reference to the `HashMap` of outputs.
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn outputs_mut(
self.base.outputs_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.outputs_mut())
} }
/// Returns the default output names. /// Returns the default output names.
@@ -151,8 +153,10 @@ impl MetaPlugin for UserMetaPlugin {
/// # Returns /// # Returns
/// ///
/// A mutable reference to the `HashMap` of options. /// A mutable reference to the `HashMap` of options.
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> { fn options_mut(
self.base.options_mut() &mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.options_mut())
} }
} }
use crate::meta_plugin::register_meta_plugin; use crate::meta_plugin::register_meta_plugin;

View File

@@ -206,12 +206,14 @@ pub fn settings_meta_plugin_types(
// Try to find the MetaPluginType by meta name // Try to find the MetaPluginType by meta name
let mut found = false; let mut found = false;
for meta_plugin_type in MetaPluginType::iter() { for meta_plugin_type in MetaPluginType::iter() {
let meta_plugin = if let Ok(meta_plugin) =
crate::meta_plugin::get_meta_plugin(meta_plugin_type.clone(), None, None); crate::meta_plugin::get_meta_plugin(meta_plugin_type.clone(), None, None)
if meta_plugin.meta_type().to_string() == trimmed_name { {
meta_plugin_types.push(meta_plugin_type); if meta_plugin.meta_type().to_string() == trimmed_name {
found = true; meta_plugin_types.push(meta_plugin_type);
break; found = true;
break;
}
} }
} }

View File

@@ -53,6 +53,7 @@ struct ServerConfig {
password_file: Option<String>, password_file: Option<String>,
password: Option<String>, password: Option<String>,
password_hash: Option<String>, password_hash: Option<String>,
cors_origin: Option<String>,
} }
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
@@ -145,6 +146,7 @@ pub fn mode_generate_config(_cmd: &mut Command, _settings: &crate::config::Setti
password_file: None, password_file: None,
password: None, password: None,
password_hash: None, password_hash: None,
cors_origin: None,
}), }),
compression_plugin: None, compression_plugin: None,
meta_plugins: Some(vec![ meta_plugins: Some(vec![

View File

@@ -1,4 +1,4 @@
use anyhow::{Result, anyhow}; use anyhow::{Context, Result, anyhow};
use std::io::Write; use std::io::Write;
use crate::common::PIPESIZE; use crate::common::PIPESIZE;
@@ -55,7 +55,7 @@ pub fn mode_get(
.find_item(conn, ids, tags, &std::collections::HashMap::new()) .find_item(conn, ids, tags, &std::collections::HashMap::new())
.map_err(|e| anyhow!("Unable to find matching item in database: {}", e))?; .map_err(|e| anyhow!("Unable to find matching item in database: {}", e))?;
let item_id = item_with_meta.item.id.unwrap(); let item_id = item_with_meta.item.id.context("Item missing ID")?;
// Determine if we should detect binary data // Determine if we should detect binary data
let mut detect_binary = !settings.force && std::io::stdout().is_terminal(); let mut detect_binary = !settings.force && std::io::stdout().is_terminal();

View File

@@ -1,7 +1,7 @@
use crate::config; use crate::config;
use crate::modes::common::{OutputFormat, format_size}; use crate::modes::common::{OutputFormat, format_size};
use crate::services::types::ItemWithMeta; use crate::services::types::ItemWithMeta;
use anyhow::{Result, anyhow}; use anyhow::{Context, Result, anyhow};
use clap::Command; use clap::Command;
use clap::error::ErrorKind; use clap::error::ErrorKind;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@@ -141,7 +141,7 @@ fn show_item(
} }
let item = item_with_meta.item; let item = item_with_meta.item;
let item_id = item.id.unwrap(); let item_id = item.id.context("Item missing ID")?;
let item_tags: Vec<String> = item_with_meta.tags.iter().map(|t| t.name.clone()).collect(); let item_tags: Vec<String> = item_with_meta.tags.iter().map(|t| t.name.clone()).collect();
let mut table = crate::modes::common::create_table(false); let mut table = crate::modes::common::create_table(false);
@@ -249,7 +249,7 @@ fn show_item_structured(
let item_tags: Vec<String> = item_with_meta.tags.iter().map(|t| t.name.clone()).collect(); let item_tags: Vec<String> = item_with_meta.tags.iter().map(|t| t.name.clone()).collect();
let meta_map = item_with_meta.meta_as_map(); let meta_map = item_with_meta.meta_as_map();
let item = item_with_meta.item; let item = item_with_meta.item;
let item_id = item.id.unwrap(); let item_id = item.id.context("Item missing ID")?;
let mut item_path_buf = data_path.clone(); let mut item_path_buf = data_path.clone();
item_path_buf.push(item_id.to_string()); item_path_buf.push(item_id.to_string());

View File

@@ -8,7 +8,7 @@ use crate::modes::common::ColumnType;
use crate::modes::common::{OutputFormat, format_size}; use crate::modes::common::{OutputFormat, format_size};
use crate::services::item_service::ItemService; use crate::services::item_service::ItemService;
use crate::services::types::ItemWithMeta; use crate::services::types::ItemWithMeta;
use anyhow::Result; use anyhow::{Context, Result};
use comfy_table::CellAlignment; use comfy_table::CellAlignment;
use comfy_table::{Attribute, Cell, Color, Row}; use comfy_table::{Attribute, Cell, Color, Row};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@@ -202,7 +202,7 @@ pub fn mode_list(
let item = item_with_meta.item; let item = item_with_meta.item;
let mut item_path = data_path.clone(); let mut item_path = data_path.clone();
item_path.push(item.id.unwrap().to_string()); item_path.push(item.id.context("Item missing ID")?.to_string());
let mut table_row = Row::new(); let mut table_row = Row::new();
@@ -210,7 +210,7 @@ pub fn mode_list(
let column_type = column let column_type = column
.name .name
.parse::<ColumnType>() .parse::<ColumnType>()
.unwrap_or_else(|_| panic!("Unknown column {:?}", column.name)); .with_context(|| format!("Unknown column type {:?} in list format", column.name))?;
let mut meta_name: Option<&str> = None; let mut meta_name: Option<&str> = None;
@@ -343,7 +343,7 @@ fn show_list_structured(
let tags: Vec<String> = item_with_meta.tags.iter().map(|t| t.name.clone()).collect(); let tags: Vec<String> = item_with_meta.tags.iter().map(|t| t.name.clone()).collect();
let meta = item_with_meta.meta_as_map(); let meta = item_with_meta.meta_as_map();
let item = item_with_meta.item; let item = item_with_meta.item;
let item_id = item.id.unwrap(); let item_id = item.id.context("Item missing ID")?;
let mut item_path = data_path.clone(); let mut item_path = data_path.clone();
item_path.push(item_id.to_string()); item_path.push(item_id.to_string());

View File

@@ -481,7 +481,10 @@ pub async fn handle_post_item(
let metadata = item_with_meta.meta_as_map(); let metadata = item_with_meta.meta_as_map();
let item_info = ItemInfo { let item_info = ItemInfo {
id: item_with_meta.item.id.unwrap(), id: item_with_meta.item.id.ok_or_else(|| {
warn!("Item missing ID");
StatusCode::INTERNAL_SERVER_ERROR
})?,
ts: item_with_meta.item.ts.to_rfc3339(), ts: item_with_meta.item.ts.to_rfc3339(),
size: item_with_meta.item.size, size: item_with_meta.item.size,
compression, compression,
@@ -542,7 +545,10 @@ pub async fn handle_get_item_latest_content(
match item_with_meta { match item_with_meta {
Ok(item) => { Ok(item) => {
let item_id = item.item.id.unwrap(); let item_id = item.item.id.ok_or_else(|| {
warn!("Item missing ID");
StatusCode::INTERNAL_SERVER_ERROR
})?;
let metadata = item.meta_as_map(); let metadata = item.meta_as_map();
// Handle as_meta parameter // Handle as_meta parameter
if params.as_meta { if params.as_meta {
@@ -948,7 +954,10 @@ pub async fn handle_delete_item(
.map_err(handle_item_error)?; .map_err(handle_item_error)?;
let item_info = ItemInfo { let item_info = ItemInfo {
id: deleted_item.id.unwrap(), id: deleted_item.id.ok_or_else(|| {
warn!("Item missing ID");
StatusCode::INTERNAL_SERVER_ERROR
})?,
ts: deleted_item.ts.to_rfc3339(), ts: deleted_item.ts.to_rfc3339(),
size: deleted_item.size, size: deleted_item.size,
compression: deleted_item.compression, compression: deleted_item.compression,
@@ -1001,7 +1010,10 @@ pub async fn handle_get_item_info(
let metadata = item_with_meta.meta_as_map(); let metadata = item_with_meta.meta_as_map();
let item_info = ItemInfo { let item_info = ItemInfo {
id: item_with_meta.item.id.unwrap(), id: item_with_meta.item.id.ok_or_else(|| {
warn!("Item missing ID");
StatusCode::INTERNAL_SERVER_ERROR
})?,
ts: item_with_meta.item.ts.to_rfc3339(), ts: item_with_meta.item.ts.to_rfc3339(),
size: item_with_meta.item.size, size: item_with_meta.item.size,
compression: item_with_meta.item.compression.clone(), compression: item_with_meta.item.compression.clone(),

View File

@@ -27,6 +27,7 @@ use std::net::SocketAddr;
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::Arc; use std::sync::Arc;
use std::time::Instant; use std::time::Instant;
use subtle::ConstantTimeEq;
use tokio::sync::Mutex; use tokio::sync::Mutex;
use utoipa::ToSchema; use utoipa::ToSchema;
@@ -75,6 +76,12 @@ pub struct ServerConfig {
/// ///
/// When both cert_file and key_file are set, the server uses HTTPS. /// When both cert_file and key_file are set, the server uses HTTPS.
pub key_file: Option<PathBuf>, pub key_file: Option<PathBuf>,
/// Optional CORS allowed origin.
///
/// When set, cross-origin requests are restricted to this origin.
/// Defaults to "http://localhost" if not specified. Use "*" to allow
/// all origins (not recommended for production).
pub cors_origin: Option<String>,
} }
/// Application state shared across all routes. /// Application state shared across all routes.
@@ -661,8 +668,11 @@ fn check_bearer_auth(
return pwhash::unix::verify(provided_password, hash); return pwhash::unix::verify(provided_password, hash);
} }
// Otherwise, do direct comparison // Otherwise, do constant-time comparison to prevent timing attacks
provided_password == expected_password provided_password
.as_bytes()
.ct_eq(expected_password.as_bytes())
.into()
} }
/// Validates basic authentication credentials. /// Validates basic authentication credentials.
@@ -704,9 +714,12 @@ fn check_basic_auth(
return pwhash::unix::verify(provided_password, hash); return pwhash::unix::verify(provided_password, hash);
} }
// Otherwise, do direct comparison // Otherwise, do constant-time comparison to prevent timing attacks
let expected_credentials = format!("keep:{expected_password}"); let expected_credentials = format!("keep:{expected_password}");
return decoded_str == expected_credentials; return decoded_str
.as_bytes()
.ct_eq(expected_credentials.as_bytes())
.into();
} }
} }
} }

View File

@@ -54,6 +54,7 @@ pub fn mode_server(
password_hash: settings.server_password_hash(), password_hash: settings.server_password_hash(),
cert_file: settings.server_cert_file(), cert_file: settings.server_cert_file(),
key_file: settings.server_key_file(), key_file: settings.server_key_file(),
cors_origin: settings.server_cors_origin(),
}; };
// Create ItemService once // Create ItemService once
@@ -122,6 +123,31 @@ async fn run_server(
create_auth_middleware(config.password.clone(), config.password_hash.clone()), create_auth_middleware(config.password.clone(), config.password_hash.clone()),
)); ));
// Build CORS layer - restricted by default, configurable via cors_origin setting
let cors_origin = config.cors_origin.as_deref().unwrap_or("http://localhost");
let cors_layer = if cors_origin == "*" {
CorsLayer::permissive()
} else {
CorsLayer::new()
.allow_origin(
cors_origin
.parse::<axum::http::HeaderValue>()
.unwrap_or_else(|_| {
log::warn!(
"Invalid CORS origin '{cors_origin}', defaulting to http://localhost"
);
"http://localhost".parse().unwrap()
}),
)
.allow_methods([
axum::http::Method::GET,
axum::http::Method::POST,
axum::http::Method::PUT,
axum::http::Method::DELETE,
])
.allow_headers(tower_http::cors::Any)
};
// Create the app with documentation routes open and others protected // Create the app with documentation routes open and others protected
let app = Router::new() let app = Router::new()
// Add documentation routes without authentication // Add documentation routes without authentication
@@ -135,11 +161,25 @@ async fn run_server(
.layer( .layer(
ServiceBuilder::new() ServiceBuilder::new()
.layer(TraceLayer::new_for_http()) .layer(TraceLayer::new_for_http())
.layer(CorsLayer::permissive()), .layer(cors_layer),
); );
let addr: SocketAddr = bind_address.parse()?; let addr: SocketAddr = bind_address.parse()?;
// Warn if password auth is enabled without TLS
if config.password.is_some() || config.password_hash.is_some() {
#[cfg(not(feature = "tls"))]
log::warn!(
"SECURITY: Password authentication enabled but TLS support is not compiled in. Password will be transmitted in plain text!"
);
#[cfg(feature = "tls")]
if config.cert_file.is_none() || config.key_file.is_none() {
log::warn!(
"SECURITY: Password authentication enabled but TLS is not configured. Password will be transmitted in plain text!"
);
}
}
// Build the app into a service // Build the app into a service
let service = app.into_make_service_with_connect_info::<SocketAddr>(); let service = app.into_make_service_with_connect_info::<SocketAddr>();

View File

@@ -78,7 +78,9 @@ fn build_meta_plugins_configured_table(status_info: &StatusInfo) -> Option<Table
}; };
// First, create a default plugin to get its default options // First, create a default plugin to get its default options
let default_plugin = get_meta_plugin(meta_plugin_type.clone(), None, None); let Ok(default_plugin) = get_meta_plugin(meta_plugin_type.clone(), None, None) else {
continue;
};
// Start with the default options // Start with the default options
let mut effective_options = default_plugin.options().clone(); let mut effective_options = default_plugin.options().clone();
@@ -96,14 +98,18 @@ fn build_meta_plugins_configured_table(status_info: &StatusInfo) -> Option<Table
.collect(); .collect();
// Create the actual plugin with merged options - the constructor will handle setting up outputs // Create the actual plugin with merged options - the constructor will handle setting up outputs
let actual_plugin = get_meta_plugin( let Ok(actual_plugin) = get_meta_plugin(
meta_plugin_type.clone(), meta_plugin_type.clone(),
Some(effective_options.clone()), Some(effective_options.clone()),
Some(outputs_converted), Some(outputs_converted),
); ) else {
continue;
};
// Get the default plugin to see its default options // Get the default plugin to see its default options
let default_plugin = get_meta_plugin(meta_plugin_type.clone(), None, None); let Ok(default_plugin) = get_meta_plugin(meta_plugin_type.clone(), None, None) else {
continue;
};
// Start with the default options // Start with the default options
let mut all_options = default_plugin.options().clone(); let mut all_options = default_plugin.options().clone();

View File

@@ -92,7 +92,9 @@ fn build_meta_plugin_table(
}; };
// Create a default plugin to get its default options // Create a default plugin to get its default options
let default_plugin = get_meta_plugin(meta_plugin_type.clone(), None, None); let Ok(default_plugin) = get_meta_plugin(meta_plugin_type.clone(), None, None) else {
continue;
};
// Get and sort options // Get and sort options
let mut options: Vec<_> = default_plugin.options().iter().collect(); let mut options: Vec<_> = default_plugin.options().iter().collect();

View File

@@ -1,30 +0,0 @@
WHITESPACE = _{ " " | "\t" | "\n" | "\r" }
filters = { filter ~ ("," ~ filters)? }
filter = { filter_name ~ ("(" ~ options ~ ")")? }
filter_name = @{ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* }
options = { option ~ ("," ~ options)? }
option = { (option_name ~ "=")? ~ option_value }
option_name = @{ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* }
option_value = {
JSON_NUMBER |
JSON_STRING |
JSON_BOOLEAN
}
JSON_NUMBER = @{
("-")? ~
("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*) ~
("." ~ ASCII_DIGIT*)? ~
(("e" | "E") ~ ("+" | "-")? ~ ASCII_DIGIT+)?
}
JSON_STRING = ${
"\"" ~
(("\\" ~ ANY) | (!("\"" | "\\") ~ ANY))* ~
"\""
}
JSON_BOOLEAN = ${ "true" | "false" }

View File

@@ -1,119 +0,0 @@
use pest::Parser;
use pest_derive::Parser;
use std::collections::HashMap;
use serde_json;
#[derive(Parser)]
#[grammar = "filter.pest"]
pub struct FilterParser;
#[derive(Debug)]
pub struct Filter {
pub name: String,
pub options: HashMap<String, serde_json::Value>,
}
pub fn parse_filter_string(input: &str) -> Result<Vec<Filter>, Box<dyn std::error::Error>> {
let mut filters = Vec::new();
let pairs = FilterParser::parse(<FilterParser as pest::Parser>::Rule::filters, input)?;
for pair in pairs {
if pair.as_rule() == <FilterParser as pest::Parser>::Rule::filter {
let mut name = String::new();
let mut options = HashMap::new();
for inner_pair in pair.into_inner() {
match inner_pair.as_rule() {
<FilterParser as pest::Parser>::Rule::filter_name => {
name = inner_pair.as_str().to_string();
}
<FilterParser as pest::Parser>::Rule::options => {
for option_pair in inner_pair.into_inner() {
if option_pair.as_rule() == <FilterParser as pest::Parser>::Rule::option {
let mut option_name = None;
let mut option_value = None;
for option_inner in option_pair.into_inner() {
match option_inner.as_rule() {
<FilterParser as pest::Parser>::Rule::option_name => {
option_name = Some(option_inner.as_str().to_string());
}
<FilterParser as pest::Parser>::Rule::option_value => {
option_value = Some(parse_option_value(option_inner.as_str())?);
}
_ => {}
}
}
if let Some(value) = option_value {
// If no name is provided, use the filter name as the key
let key = option_name.unwrap_or_else(|| name.clone());
options.insert(key, value);
}
}
}
}
_ => {}
}
}
filters.push(Filter { name, options });
}
}
Ok(filters)
}
fn parse_option_value(input: &str) -> Result<serde_json::Value, Box<dyn std::error::Error>> {
serde_json::from_str(input).map_err(|e| Box::new(e) as Box<dyn std::error::Error>)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_simple_filter() {
let result = parse_filter_string("grep").unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].name, "grep");
assert!(result[0].options.is_empty());
}
#[test]
fn test_parse_filter_with_options() {
let result = parse_filter_string("head_lines(10)").unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].name, "head_lines");
assert_eq!(result[0].options.len(), 1);
if let serde_json::Value::Number(n) = result[0].options.get("head_lines").unwrap() {
assert_eq!(n.as_i64(), Some(10));
} else {
panic!("Expected number");
}
}
#[test]
fn test_parse_filter_with_named_options() {
let result = parse_filter_string(r#"grep(pattern="error")"#).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].name, "grep");
assert_eq!(result[0].options.get("pattern").unwrap().as_str(), Some("error"));
}
#[test]
fn test_parse_multiple_filters() {
let result = parse_filter_string(r#"head_lines(10),grep(pattern="error")"#).unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].name, "head_lines");
assert_eq!(result[0].options.len(), 1);
if let serde_json::Value::Number(n) = result[0].options.get("head_lines").unwrap() {
assert_eq!(n.as_i64(), Some(10));
} else {
panic!("Expected number");
}
assert_eq!(result[1].name, "grep");
assert_eq!(result[1].options.len(), 1);
assert_eq!(result[1].options.get("pattern").unwrap().as_str(), Some("error"));
}
}

View File

@@ -1,15 +0,0 @@
/// Parsing utilities for filters and other inputs.
///
/// This module provides tools for parsing filter strings and other structured
/// inputs used throughout the application. Currently, it includes a pest-based
/// parser for filter expressions.
///
/// # Examples
///
/// ```
/// use keep::parser::parse_filter_string;
/// let filters = parse_filter_string("head:5|grep:hello").unwrap();
/// ```
pub mod filter_parser;
pub use filter_parser::{FilterParser, parse_filter_string};

View File

@@ -155,11 +155,10 @@ impl AsyncItemService {
.map(|s| s.to_string()) .map(|s| s.to_string())
.unwrap_or_else(|| "application/octet-stream".to_string()); .unwrap_or_else(|| "application/octet-stream".to_string());
let is_binary = if let Some(text_val) = metadata.get("text") { let is_binary = crate::common::is_binary::is_content_binary_from_metadata(
text_val == "false" &metadata,
} else { &content_clone,
crate::common::is_binary::is_binary(&content_clone) );
};
Ok::<_, CoreError>((mime_type, is_binary)) Ok::<_, CoreError>((mime_type, is_binary))
}) })

View File

@@ -234,18 +234,14 @@ impl ItemService {
compression: &str, compression: &str,
metadata: &HashMap<String, String>, metadata: &HashMap<String, String>,
) -> Result<bool, CoreError> { ) -> Result<bool, CoreError> {
// Check if we already have text metadata
if let Some(text_val) = metadata.get("text") {
return Ok(text_val == "false");
}
// Read only the first 8192 bytes for binary detection // Read only the first 8192 bytes for binary detection
let mut sample_reader = self let mut sample_reader = self
.compression_service .compression_service
.stream_item_content(item_path, compression)?; .stream_item_content(item_path, compression)?;
let mut sample_buffer = vec![0; 8192]; let mut sample_buffer = vec![0; 8192];
let bytes_read = sample_reader.read(&mut sample_buffer)?; let bytes_read = sample_reader.read(&mut sample_buffer)?;
Ok(crate::common::is_binary::is_binary( Ok(crate::common::is_binary::is_content_binary_from_metadata(
metadata,
&sample_buffer[..bytes_read], &sample_buffer[..bytes_read],
)) ))
} }
@@ -516,7 +512,9 @@ impl ItemService {
let mut result = Vec::new(); let mut result = Vec::new();
for item in items { for item in items {
let item_id = item.id.unwrap(); let item_id = item
.id
.ok_or_else(|| CoreError::InvalidInput("Item missing ID".to_string()))?;
let tags = tags_map.get(&item_id).cloned().unwrap_or_default(); let tags = tags_map.get(&item_id).cloned().unwrap_or_default();
let meta_hm = meta_map_db.get(&item_id).cloned().unwrap_or_default(); let meta_hm = meta_map_db.get(&item_id).cloned().unwrap_or_default();
let meta = meta_hm let meta = meta_hm
@@ -636,7 +634,9 @@ impl ItemService {
let mut item; let mut item;
{ {
item = db::create_item(conn, compression_type.clone())?; item = db::create_item(conn, compression_type.clone())?;
item_id = item.id.unwrap(); item_id = item
.id
.ok_or_else(|| CoreError::InvalidInput("Item missing ID".to_string()))?;
debug!("ITEM_SERVICE: Created new item with id: {item_id}"); debug!("ITEM_SERVICE: Created new item with id: {item_id}");
db::set_item_tags(conn, item.clone(), tags)?; db::set_item_tags(conn, item.clone(), tags)?;
debug!("ITEM_SERVICE: Set tags for item {item_id}"); debug!("ITEM_SERVICE: Set tags for item {item_id}");
@@ -770,7 +770,9 @@ impl ItemService {
{ {
item = db::create_item(conn, compression_type.clone())?; item = db::create_item(conn, compression_type.clone())?;
item_id = item.id.unwrap(); item_id = item
.id
.ok_or_else(|| CoreError::InvalidInput("Item missing ID".to_string()))?;
debug!("ITEM_SERVICE: Created MCP item with id: {item_id}"); debug!("ITEM_SERVICE: Created MCP item with id: {item_id}");
// Add tags // Add tags

View File

@@ -21,7 +21,7 @@ impl MetaService {
// Create plugins with their configuration // Create plugins with their configuration
let meta_plugins: Vec<Box<dyn MetaPlugin>> = meta_plugin_types let meta_plugins: Vec<Box<dyn MetaPlugin>> = meta_plugin_types
.iter() .iter()
.map(|meta_plugin_type| { .filter_map(|meta_plugin_type| {
debug!("META_SERVICE: Creating plugin: {meta_plugin_type:?}"); debug!("META_SERVICE: Creating plugin: {meta_plugin_type:?}");
// Get the plugin name using strum's Display implementation // Get the plugin name using strum's Display implementation
@@ -52,7 +52,13 @@ impl MetaService {
(None, None) (None, None)
}; };
crate::meta_plugin::get_meta_plugin(meta_plugin_type.clone(), options, outputs) match crate::meta_plugin::get_meta_plugin(meta_plugin_type.clone(), options, outputs) {
Ok(plugin) => Some(plugin),
Err(e) => {
log::warn!("META_SERVICE: Failed to create plugin {meta_plugin_type:?}: {e}, skipping");
None
}
}
}) })
.collect(); .collect();

View File

@@ -73,7 +73,9 @@ impl SyncDataService {
reader.read_to_end(&mut content)?; reader.read_to_end(&mut content)?;
let item = self.save_item(&*content, &mut cmd, settings, &mut tags, conn)?; let item = self.save_item(&*content, &mut cmd, settings, &mut tags, conn)?;
let item_id = item.id.unwrap(); let item_id = item
.id
.ok_or_else(|| CoreError::InvalidInput("Item missing ID".to_string()))?;
// Set metadata // Set metadata
for (key, value) in metadata { for (key, value) in metadata {
@@ -109,85 +111,8 @@ impl SyncDataService {
compress: bool, compress: bool,
run_meta: bool, run_meta: bool,
) -> Result<ItemWithMeta, CoreError> { ) -> Result<ItemWithMeta, CoreError> {
let mut cmd = Command::new("keep"); let mut cursor = Cursor::new(content);
let settings = &self.settings; self.save_item_raw_streaming(conn, &mut cursor, tags, metadata, compress, run_meta)
let mut tags = tags;
if tags.is_empty() {
tags.push("none".to_string());
}
let compression_type = if compress {
settings_compression_type(&mut cmd, settings)
} else {
CompressionType::None
};
let compression_engine = get_compression_engine(compression_type.clone())?;
let item_id;
let mut item;
{
item = crate::db::create_item(conn, compression_type.clone())?;
item_id = item.id.unwrap();
crate::db::set_item_tags(conn, item.clone(), &tags)?;
}
// Initialize meta plugins if requested
let meta_service = MetaService::new();
let mut plugins = if run_meta {
meta_service.get_plugins(&mut cmd, settings)
} else {
Vec::new()
};
if run_meta {
meta_service.initialize_plugins(&mut plugins, conn, item_id);
}
// Write content to file
let mut item_path = self.item_service.get_data_path().clone();
item_path.push(item_id.to_string());
let mut item_out = compression_engine.create(item_path)?;
let mut total_bytes = 0i64;
const PIPESIZE: usize = 65536;
if run_meta && !plugins.is_empty() {
// Process in chunks for meta plugins
let mut offset = 0;
while offset < content.len() {
let end = std::cmp::min(offset + PIPESIZE, content.len());
let chunk = &content[offset..end];
item_out.write_all(chunk)?;
total_bytes += chunk.len() as i64;
meta_service.process_chunk(&mut plugins, chunk, conn, item_id);
offset = end;
}
} else {
// Write all at once, no meta processing
item_out.write_all(content)?;
total_bytes = content.len() as i64;
}
item_out.flush()?;
drop(item_out);
// Finalize meta plugins
if run_meta {
meta_service.finalize_plugins(&mut plugins, conn, item_id);
}
// Add client-provided metadata
for (key, value) in &metadata {
crate::db::add_meta(conn, item_id, key, value)?;
}
item.size = Some(total_bytes);
crate::db::update_item(conn, item)?;
self.get_item(conn, item_id)
} }
/// Save an item from a streaming reader with granular control over compression. /// Save an item from a streaming reader with granular control over compression.
@@ -224,7 +149,9 @@ impl SyncDataService {
let mut item; let mut item;
{ {
item = crate::db::create_item(conn, compression_type.clone())?; item = crate::db::create_item(conn, compression_type.clone())?;
item_id = item.id.unwrap(); item_id = item
.id
.ok_or_else(|| CoreError::InvalidInput("Item missing ID".to_string()))?;
crate::db::set_item_tags(conn, item.clone(), &tags)?; crate::db::set_item_tags(conn, item.clone(), &tags)?;
} }
@@ -246,7 +173,7 @@ impl SyncDataService {
let mut item_out = compression_engine.create(item_path)?; let mut item_out = compression_engine.create(item_path)?;
let mut buffer = [0u8; 65536]; let mut buffer = [0u8; crate::common::PIPESIZE];
let mut total_bytes = 0i64; let mut total_bytes = 0i64;
loop { loop {