Files
keep/src/compression_engine/mod.rs
Andrew Phillips 8a8a6e1c4b fix: correct critical bugs and improve pipe streaming performance
Critical bug fixes:
- save_item now returns real Item from database, not a hardcoded fake
- AsyncDataService::save() reuses self.sync_service instead of creating redundant instance
- GenerateStatus trait signature mismatch fixed (CLI/API decoupling)

Performance improvements (pipe path untouched):
- CompressionEngine::open() returns Box<dyn Read + Send> enabling true streaming
- mode_get eliminates triple full-file read (was sampling then re-reading entire file)
- FilteringReader adds fast-path bypass when no filters, pre-allocates temp buffer
- text.rs meta plugin processes &[u8] slice directly, eliminates data.to_vec() clone

API correctness:
- Tag parse errors now return 400 instead of being silently discarded
- compute_diff uses similar crate (LCS-based) instead of naive positional comparison

Cleanup:
- Modernize string formatting (format!({x})) across codebase
- Remove redundant DB query in get mode
- Derive Debug/ToSchema on public types
- Delete placeholder test files with no real assertions
- Extract parse_comma_tags utility function
2026-03-11 20:45:05 -03:00

230 lines
7.2 KiB
Rust

use anyhow::{anyhow, Result};
use std::io;
use std::io::{Read, Write};
use std::path::PathBuf;
use strum::IntoEnumIterator;
use strum::{Display, EnumIter, EnumString};
use log::*;
use lazy_static::lazy_static;
extern crate enum_map;
use enum_map::enum_map;
use enum_map::{Enum, EnumMap};
pub mod gzip;
pub mod lz4;
pub mod none;
pub mod program;
use crate::compression_engine::program::CompressionEngineProgram;
/// Enum representing different compression types supported by the system.
///
/// This enum defines all supported compression formats that can be used for
/// storing and retrieving compressed items. Each variant corresponds to a
/// specific compression algorithm or no compression.
///
/// # Examples
///
/// ```
/// use keep::compression_engine::CompressionType;
/// assert_eq!(CompressionType::GZip.to_string(), "gzip");
/// ```
#[derive(Debug, Eq, PartialEq, Clone, EnumIter, Display, EnumString, enum_map::Enum)]
#[strum(ascii_case_insensitive)]
pub enum CompressionType {
LZ4,
GZip,
BZip2,
XZ,
ZStd,
None,
}
/// Trait defining the interface for compression engines.
///
/// This trait provides a unified API for different compression implementations.
/// Implementors handle reading from and writing to compressed files, as well as
/// utility operations like copying decompressed content or calculating sizes.
///
/// # Errors
///
/// Methods may return `anyhow::Error` for I/O failures, unsupported formats,
/// or invalid file paths.
///
/// # Examples
///
/// ```ignore
/// // Example usage would depend on a concrete implementation
/// use keep::compression_engine::CompressionEngine;
/// let engine = /* some engine */;
/// let reader = engine.open("file.gz".into()).unwrap();
/// ```
pub trait CompressionEngine: Send + Sync {
/// Opens a compressed file for reading.
///
/// Creates a reader that transparently decompresses the file contents as they are read.
///
/// # Arguments
///
/// * `file_path` - Path to the compressed file.
///
/// # Returns
///
/// * `Result<Box<dyn Read + Send>>` - A boxed reader that decompresses the file on read,
/// or an error if the file cannot be opened or is invalid.
///
/// # Errors
///
/// Returns an error if the file does not exist, is not a valid compressed file,
/// or if decompression fails.
fn open(&self, file_path: PathBuf) -> Result<Box<dyn Read + Send>>;
/// Creates a new compressed file for writing.
///
/// Creates a writer that transparently compresses data as it is written.
///
/// # Arguments
///
/// * `file_path` - Path where the compressed file will be created.
///
/// # Returns
///
/// * `Result<Box<dyn Write>>` - A boxed writer that compresses data on write,
/// or an error if the file cannot be created.
///
/// # Errors
///
/// Returns an error if the path is invalid or if there are permission issues.
fn create(&self, file_path: PathBuf) -> Result<Box<dyn Write>>;
/// Checks if this compression engine is supported on the current system.
///
/// Some compression types may require external programs or features to be enabled.
///
/// # Returns
///
/// * `bool` - True if supported, false otherwise.
fn is_supported(&self) -> bool {
true
}
/// Checks if this compression engine is internal (built-in) or external (program-based).
///
/// Internal engines use Rust implementations without external dependencies.
/// External engines rely on system programs.
///
/// # Returns
///
/// * `bool` - True if internal, false if external.
fn is_internal(&self) -> bool {
true
}
/// Returns status information for this compression engine.
///
/// For internal engines, returns ("<INTERNAL>", "", "").
/// For external program engines, returns (program_binary, compress_args, decompress_args).
///
/// # Returns
///
/// A tuple of (binary, compress_command, decompress_command).
fn get_status_info(&self) -> (String, String, String) {
("<INTERNAL>".to_string(), "".to_string(), "".to_string())
}
/// Copies decompressed content from a file to a writer.
///
/// Reads the compressed file and writes the decompressed content to the provided writer.
///
/// # Arguments
///
/// * `file_path` - Path to the compressed file.
/// * `writer` - Writer to receive decompressed content.
///
/// # Returns
///
/// * `Result<()>` - Success if the copy completes, or an error.
///
/// # Errors
///
/// Propagates errors from opening the file or copying data.
fn copy(&self, file_path: PathBuf, writer: &mut dyn Write) -> Result<()> {
let mut reader = self.open(file_path)?;
io::copy(&mut reader, writer)?;
writer.flush()?;
Ok(())
}
/// Clones this compression engine into a new boxed instance.
///
/// Required for dynamic trait object cloning.
///
/// # Returns
///
/// A new `Box<dyn CompressionEngine>` clone of this engine.
fn clone_box(&self) -> Box<dyn CompressionEngine>;
}
impl Clone for Box<dyn CompressionEngine> {
fn clone(&self) -> Self {
self.as_ref().clone_box()
}
}
lazy_static! {
static ref COMPRESSION_ENGINES: EnumMap<CompressionType, Box<dyn CompressionEngine>> = {
let mut em = enum_map! {
CompressionType::LZ4 => Box::new(crate::compression_engine::lz4::CompressionEngineLZ4::new()) as Box<dyn CompressionEngine>,
CompressionType::GZip => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
"gzip",
vec!["-c"],
vec!["-d", "-c"]
)) as Box<dyn CompressionEngine>,
CompressionType::BZip2 => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
"bzip2",
vec!["-c"],
vec!["-d", "-c"]
)) as Box<dyn CompressionEngine>,
CompressionType::XZ => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
"xz",
vec!["-c"],
vec!["-d", "-c"]
)) as Box<dyn CompressionEngine>,
CompressionType::ZStd => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
"zstd",
vec!["-c"],
vec!["-d", "-c"]
)) as Box<dyn CompressionEngine>,
CompressionType::None => Box::new(crate::compression_engine::none::CompressionEngineNone::new()) as Box<dyn CompressionEngine>
};
#[cfg(feature = "gzip")]
{
em[CompressionType::GZip] =
Box::new(crate::compression_engine::gzip::CompressionEngineGZip::new())
as Box<dyn CompressionEngine>;
}
em
};
}
pub fn default_compression_type() -> CompressionType {
CompressionType::LZ4
}
pub fn get_compression_engine(ct: CompressionType) -> Result<Box<dyn CompressionEngine>> {
let engine = &COMPRESSION_ENGINES[ct.clone()];
if engine.is_supported() {
Ok(engine.clone())
} else {
Err(anyhow!(
"Compression engine for {} is not supported",
ct.to_string()
))
}
}