chore: code review cleanup — fixes, deps, docs

Fixed:
- CLI help typo: "metatdata" -> "metadata"
- Filter buffer OOM: check size before loading into memory

Changed:
- #[inline] on HTML escape helpers for hot path performance
- Replaced once_cell and lazy_static with std::sync::LazyLock
- Removed unused once_cell and lazy_static crate dependencies

Refactored:
- Added module-level doc to services/ module

Documentation:
- README.md: zstd is native not external, "none" -> "raw"
- DESIGN.md: current schema and meta plugins section
- CHANGELOG.md: Unreleased section populated
This commit is contained in:
2026-03-21 11:44:37 -03:00
parent ab2fb07505
commit b3edfe7de6
15 changed files with 176 additions and 135 deletions

View File

@@ -29,9 +29,7 @@ pub struct ModeArgs {
pub save: bool,
#[arg(group("mode"), help_heading("Mode Options"), short, long, conflicts_with_all(["save", "diff", "list", "delete", "info", "update", "status", "export", "import"]))]
#[arg(help(
"Get an item either by it's ID or by a combination of matching tags and metatdata"
))]
#[arg(help("Get an item either by its ID or by a combination of matching tags and metadata"))]
pub get: bool,
#[arg(group("mode"), help_heading("Mode Options"), long, conflicts_with_all(["save", "get", "list", "delete", "info", "update", "status", "export", "import"]))]
@@ -48,9 +46,7 @@ pub struct ModeArgs {
pub delete: bool,
#[arg(group("mode"), help_heading("Mode Options"), short, long, conflicts_with_all(["save", "get", "diff", "list", "delete", "update", "status", "export", "import"]))]
#[arg(help(
"Get an item either by it's ID or by a combination of matching tags and metatdata"
))]
#[arg(help("Get an item either by its ID or by a combination of matching tags and metadata"))]
pub info: bool,
#[arg(group("mode"), help_heading("Mode Options"), short('u'), long, conflicts_with_all(["save", "get", "diff", "list", "delete", "info", "status", "export", "import"]))]

View File

@@ -7,8 +7,6 @@ use strum::{Display, EnumIter, EnumString};
use log::*;
use lazy_static::lazy_static;
extern crate enum_map;
use enum_map::enum_map;
use enum_map::{Enum, EnumMap};
@@ -180,63 +178,65 @@ impl Clone for Box<dyn CompressionEngine> {
}
}
lazy_static! {
static ref COMPRESSION_ENGINES: EnumMap<CompressionType, Box<dyn CompressionEngine>> = {
#[allow(unused_mut)] // mut needed when gzip/lz4 features are enabled
let mut em = enum_map! {
CompressionType::LZ4 => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
"lz4",
vec!["-c"],
vec!["-d", "-c"]
)) as Box<dyn CompressionEngine>,
CompressionType::GZip => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
"gzip",
vec!["-c"],
vec!["-d", "-c"]
)) as Box<dyn CompressionEngine>,
CompressionType::BZip2 => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
"bzip2",
vec!["-c"],
vec!["-d", "-c"]
)) as Box<dyn CompressionEngine>,
CompressionType::XZ => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
"xz",
vec!["-c"],
vec!["-d", "-c"]
)) as Box<dyn CompressionEngine>,
CompressionType::ZStd => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
"zstd",
vec!["-c"],
vec!["-d", "-c"]
)) as Box<dyn CompressionEngine>,
CompressionType::Raw => Box::new(crate::compression_engine::raw::CompressionEngineRaw::new()) as Box<dyn CompressionEngine>
};
#[cfg(feature = "gzip")]
{
em[CompressionType::GZip] =
Box::new(crate::compression_engine::gzip::CompressionEngineGZip::new())
as Box<dyn CompressionEngine>;
}
#[cfg(feature = "lz4")]
{
em[CompressionType::LZ4] =
Box::new(crate::compression_engine::lz4::CompressionEngineLZ4::new())
as Box<dyn CompressionEngine>;
}
#[cfg(feature = "zstd")]
{
em[CompressionType::ZStd] =
Box::new(crate::compression_engine::zstd::CompressionEngineZstd::new())
as Box<dyn CompressionEngine>;
}
em
fn init_compression_engines() -> EnumMap<CompressionType, Box<dyn CompressionEngine>> {
#[allow(unused_mut)]
let mut em: EnumMap<CompressionType, Box<dyn CompressionEngine>> = enum_map! {
CompressionType::LZ4 => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
"lz4",
vec!["-c"],
vec!["-d", "-c"]
)) as Box<dyn CompressionEngine>,
CompressionType::GZip => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
"gzip",
vec!["-c"],
vec!["-d", "-c"]
)) as Box<dyn CompressionEngine>,
CompressionType::BZip2 => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
"bzip2",
vec!["-c"],
vec!["-d", "-c"]
)) as Box<dyn CompressionEngine>,
CompressionType::XZ => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
"xz",
vec!["-c"],
vec!["-d", "-c"]
)) as Box<dyn CompressionEngine>,
CompressionType::ZStd => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
"zstd",
vec!["-c"],
vec!["-d", "-c"]
)) as Box<dyn CompressionEngine>,
CompressionType::Raw => Box::new(crate::compression_engine::raw::CompressionEngineRaw::new()) as Box<dyn CompressionEngine>
};
#[cfg(feature = "gzip")]
{
em[CompressionType::GZip] =
Box::new(crate::compression_engine::gzip::CompressionEngineGZip::new())
as Box<dyn CompressionEngine>;
}
#[cfg(feature = "lz4")]
{
em[CompressionType::LZ4] =
Box::new(crate::compression_engine::lz4::CompressionEngineLZ4::new())
as Box<dyn CompressionEngine>;
}
#[cfg(feature = "zstd")]
{
em[CompressionType::ZStd] =
Box::new(crate::compression_engine::zstd::CompressionEngineZstd::new())
as Box<dyn CompressionEngine>;
}
em
}
static COMPRESSION_ENGINES: std::sync::LazyLock<
EnumMap<CompressionType, Box<dyn CompressionEngine>>,
> = std::sync::LazyLock::new(init_compression_engines);
pub fn default_compression_type() -> CompressionType {
CompressionType::LZ4
}

View File

@@ -1,6 +1,5 @@
use anyhow::{Context, Error, Result, anyhow};
use chrono::prelude::*;
use lazy_static::lazy_static;
use log::*;
use rusqlite::{Connection, OpenFlags, Row, params};
use rusqlite_migration::{M, Migrations};
@@ -47,25 +46,21 @@ let id = db::insert_item(&conn, item)?;
```
*/
lazy_static! {
// Database schema migrations for the Keep application.
//
// Defines the sequence of migrations to create and update the schema.
// Applied automatically when opening a database connection.
static ref MIGRATIONS: Migrations<'static> = Migrations::new(vec![
static MIGRATIONS: std::sync::LazyLock<Migrations<'static>> = std::sync::LazyLock::new(|| {
Migrations::new(vec![
M::up(
"CREATE TABLE items(
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
ts TEXT NOT NULL,
size INTEGER NULL,
compression TEXT NOT NULL)"
compression TEXT NOT NULL)",
),
M::up(
"CREATE TABLE tags (
id INTEGER NOT NULL,
name TEXT NOT NULL,
FOREIGN KEY(id) REFERENCES items(id) ON DELETE CASCADE,
PRIMARY KEY(id, name));"
PRIMARY KEY(id, name));",
),
M::up(
"CREATE TABLE metas (
@@ -73,16 +68,17 @@ lazy_static! {
name TEXT NOT NULL,
value TEXT NOT NULL,
FOREIGN KEY(id) REFERENCES items(id) ON DELETE CASCADE,
PRIMARY KEY(id, name));"
PRIMARY KEY(id, name));",
),
M::up("CREATE INDEX idx_tags_name ON tags(name)"),
M::up("CREATE INDEX idx_metas_name ON metas(name)"),
M::up("CREATE INDEX idx_items_ts ON items(ts)"),
M::up("UPDATE items SET compression = 'raw' WHERE compression = 'none'"),
M::up("ALTER TABLE items RENAME COLUMN size TO uncompressed_size"),
M::up("ALTER TABLE items ADD COLUMN compressed_size INTEGER NULL"),
M::up("ALTER TABLE items ADD COLUMN closed BOOLEAN NOT NULL DEFAULT 1"),
]);
}
])
});
/// Represents an item stored in the database.
///

View File

@@ -213,6 +213,44 @@ pub enum FilterType {
/// Prevents OOM on large files by rejecting inputs that exceed this limit.
const MAX_FILTER_BUFFER_SIZE: usize = 256 * 1024 * 1024;
struct BoundedVecWriter {
data: Vec<u8>,
limit: usize,
}
impl BoundedVecWriter {
fn new(limit: usize) -> Self {
Self {
data: Vec::new(),
limit,
}
}
fn into_inner(self) -> Vec<u8> {
self.data
}
}
impl std::io::Write for BoundedVecWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
if self.data.len() + buf.len() > self.limit {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!(
"Input size exceeds maximum filter buffer size ({} bytes)",
MAX_FILTER_BUFFER_SIZE
),
));
}
self.data.write_all(buf)?;
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
/// A chain of filter plugins applied sequentially.
///
/// Chains multiple filters, applying them in order to the input stream.
@@ -360,21 +398,10 @@ impl FilterChain {
}
// For multiple plugins, we need to chain them together
// We'll use a temporary buffer to hold intermediate results
let mut current_data = Vec::new();
std::io::copy(reader, &mut current_data)?;
if current_data.len() > MAX_FILTER_BUFFER_SIZE {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!(
"Input size ({} bytes) exceeds maximum filter buffer size ({} bytes). \
Consider using fewer filter plugins or smaller inputs.",
current_data.len(),
MAX_FILTER_BUFFER_SIZE
),
));
}
// We'll use a bounded buffer to hold intermediate results
let mut bounded_writer = BoundedVecWriter::new(MAX_FILTER_BUFFER_SIZE);
std::io::copy(reader, &mut bounded_writer)?;
let mut current_data = bounded_writer.into_inner();
// Store the plugins length to avoid borrowing issues
let plugins_len = self.plugins.len();

View File

@@ -1,5 +1,4 @@
use log::{debug, warn};
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
@@ -444,9 +443,9 @@ where
///
/// An empty `HashMap` (default implementation).
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
use once_cell::sync::Lazy;
static EMPTY: Lazy<std::collections::HashMap<String, serde_yaml::Value>> =
Lazy::new(std::collections::HashMap::new);
use std::sync::LazyLock;
static EMPTY: LazyLock<std::collections::HashMap<String, serde_yaml::Value>> =
LazyLock::new(std::collections::HashMap::new);
&EMPTY
}
@@ -471,9 +470,9 @@ where
///
/// An empty `HashMap` (default implementation).
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
use once_cell::sync::Lazy;
static EMPTY: Lazy<std::collections::HashMap<String, serde_yaml::Value>> =
Lazy::new(std::collections::HashMap::new);
use std::sync::LazyLock;
static EMPTY: LazyLock<std::collections::HashMap<String, serde_yaml::Value>> =
LazyLock::new(std::collections::HashMap::new);
&EMPTY
}
@@ -602,8 +601,9 @@ where
}
/// Global registry for meta plugins.
static META_PLUGIN_REGISTRY: Lazy<Mutex<HashMap<MetaPluginType, PluginConstructor>>> =
Lazy::new(|| Mutex::new(HashMap::new()));
static META_PLUGIN_REGISTRY: std::sync::LazyLock<
Mutex<HashMap<MetaPluginType, PluginConstructor>>,
> = std::sync::LazyLock::new(|| Mutex::new(HashMap::new()));
/// Register a meta plugin with the global registry.
///

View File

@@ -21,7 +21,6 @@ use chrono::{DateTime, Utc};
use clap::Command;
use clap::error::ErrorKind;
use comfy_table::{Attribute, Cell, ContentArrangement, Table};
use lazy_static::lazy_static;
use log::debug;
use regex::Regex;
use serde::{Deserialize, Serialize};
@@ -57,9 +56,8 @@ pub enum OutputFormat {
Yaml,
}
lazy_static! {
static ref KEEP_META_RE: Regex = Regex::new(r"^KEEP_META_(.+)$").unwrap();
}
static KEEP_META_RE: std::sync::LazyLock<Regex> =
std::sync::LazyLock::new(|| Regex::new(r"^KEEP_META_(.+)$").unwrap());
pub const IMPORT_FORMAT_ERROR: &str =
"Unsupported import format: {} (expected .keep.tar or .meta.yml)";

View File

@@ -13,11 +13,13 @@ use serde::Deserialize;
use std::collections::HashMap;
/// Escape text content for safe HTML insertion.
#[inline]
fn esc(s: &str) -> String {
encode_text(s).to_string()
}
/// Escape attribute values for safe HTML attribute insertion.
#[inline]
fn esc_attr(s: &str) -> String {
encode_double_quoted_attribute(s).to_string()
}

View File

@@ -1,5 +1,4 @@
use crate::filter_plugin::{FilterChain, parse_filter_string};
use once_cell::sync::Lazy;
use std::collections::HashMap;
use std::io::{Read, Result, Write};
use std::sync::Mutex;
@@ -166,8 +165,8 @@ impl FilterService {
/// # Panics
///
/// Lock acquisition failures (rare) cause panics in accessors.
static FILTER_PLUGIN_REGISTRY: Lazy<Mutex<HashMap<String, FilterConstructor>>> =
Lazy::new(|| Mutex::new(HashMap::new()));
static FILTER_PLUGIN_REGISTRY: std::sync::LazyLock<Mutex<HashMap<String, FilterConstructor>>> =
std::sync::LazyLock::new(|| Mutex::new(HashMap::new()));
/// Registers a filter plugin in the global registry.
///

View File

@@ -1,3 +1,8 @@
/// Business logic services for the Keep application.
///
/// This module provides the core service layer that orchestrates item storage,
/// compression, metadata collection, and filtering. Services are used by both
/// local CLI modes and the HTTP server.
pub mod compression_service;
pub mod error;
pub mod filter_service;

View File

@@ -1,5 +1,4 @@
use anyhow::{Result, bail};
use once_cell::sync::Lazy;
/// Supported LLM token encodings.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
@@ -48,10 +47,10 @@ impl std::fmt::Debug for Tokenizer {
}
/// Static tokenizer instances — loaded once per process, shared across all plugins.
static CL100K: Lazy<Tokenizer> = Lazy::new(|| {
static CL100K: std::sync::LazyLock<Tokenizer> = std::sync::LazyLock::new(|| {
Tokenizer::new(TokenEncoding::Cl100kBase).expect("Failed to create cl100k_base tokenizer")
});
static O200K: Lazy<Tokenizer> = Lazy::new(|| {
static O200K: std::sync::LazyLock<Tokenizer> = std::sync::LazyLock::new(|| {
Tokenizer::new(TokenEncoding::O200kBase).expect("Failed to create o200k_base tokenizer")
});