Compare commits

...

3 Commits

Author SHA1 Message Date
e2cb36d2a8 feat(server): add file_size to API ItemInfo response 2026-03-21 14:03:58 -03:00
0004324301 perf: pre-allocate status info collections with known capacities 2026-03-21 13:54:37 -03:00
b3edfe7de6 chore: code review cleanup — fixes, deps, docs
Fixed:
- CLI help typo: "metatdata" -> "metadata"
- Filter buffer OOM: check size before loading into memory

Changed:
- #[inline] on HTML escape helpers for hot path performance
- Replaced once_cell and lazy_static with std::sync::LazyLock
- Removed unused once_cell and lazy_static crate dependencies

Refactored:
- Added module-level doc to services/ module

Documentation:
- README.md: zstd is native not external, "none" -> "raw"
- DESIGN.md: current schema and meta plugins section
- CHANGELOG.md: Unreleased section populated
2026-03-21 11:44:37 -03:00
18 changed files with 242 additions and 154 deletions

View File

@@ -7,6 +7,32 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
### Added
- Database index on `items(ts)` column for faster ORDER BY sorting
- Server API `ItemInfo` now includes `file_size` — actual filesystem-reported size of the item data file
### Changed
- Filter plugins check size before loading content into memory (prevents OOM on large inputs)
- Status page pre-allocates collections with known capacities (meta plugins, compression info)
- `#[inline]` on HTML escape helper functions (`esc`, `esc_attr`) for hot path performance
- Removed `once_cell` crate (replaced with `std::sync::LazyLock` from Rust 1.80)
- Removed `lazy_static` crate (replaced with `std::sync::LazyLock`)
### Fixed
- CLI help text typo: "metatdata" → "metadata" in `--get` and `--info` descriptions
### Refactored
- Added module-level documentation to `services/` module
### Documentation
- README.md: Fixed compression table — zstd is native (not external), "none" renamed to "raw"
- DESIGN.md: Updated schema to reflect current `items` table columns and meta plugin inventory
## [0.1.0] - 2026-03-21 ## [0.1.0] - 2026-03-21
### Added ### Added

2
Cargo.lock generated
View File

@@ -1727,7 +1727,6 @@ dependencies = [
"inventory", "inventory",
"is-terminal", "is-terminal",
"jsonwebtoken", "jsonwebtoken",
"lazy_static",
"libc", "libc",
"local-ip-address", "local-ip-address",
"log", "log",
@@ -1735,7 +1734,6 @@ dependencies = [
"magic", "magic",
"md5", "md5",
"nix", "nix",
"once_cell",
"os_pipe", "os_pipe",
"pest", "pest",
"pest_derive", "pest_derive",

View File

@@ -35,7 +35,6 @@ hyper = { version = "1.0", features = ["full"] }
http-body-util = "0.1" http-body-util = "0.1"
inventory = "0.3" inventory = "0.3"
is-terminal = "0.4" is-terminal = "0.4"
lazy_static = "1.5"
libc = "0.2" libc = "0.2"
local-ip-address = "0.6" local-ip-address = "0.6"
log = "0.4" log = "0.4"
@@ -45,7 +44,6 @@ magic = { version = "0.13", optional = true }
infer = { version = "0.19", optional = true } infer = { version = "0.19", optional = true }
tree_magic_mini = { version = "3.2", optional = true } tree_magic_mini = { version = "3.2", optional = true }
nix = { version = "0.30", features = ["fs", "process"] } nix = { version = "0.30", features = ["fs", "process"] }
once_cell = "1.21"
comfy-table = "7.2" comfy-table = "7.2"
pwhash = "1.0" pwhash = "1.0"
regex = "1.10" regex = "1.10"

View File

@@ -117,7 +117,7 @@
## Data Storage ## Data Storage
### Database Schema ### Database Schema
- `items` table: id (primary key), ts (timestamp), size (optional), compression - `items` table: id (primary key), ts (timestamp), uncompressed_size (optional), compressed_size (optional), closed (boolean), compression
- `tags` table: id (foreign key to items), name (tag name) - `tags` table: id (foreign key to items), name (tag name)
- `metas` table: id (foreign key to items), name (meta key), value (meta value) - `metas` table: id (foreign key to items), name (meta key), value (meta value)
- Indexes on tag names and meta names for faster queries - Indexes on tag names and meta names for faster queries
@@ -178,26 +178,25 @@
- None (no compression) - None (no compression)
## Supported Meta Plugins ## Supported Meta Plugins
- FileMagic - File type detection using file command
- FileMime - MIME type detection using file command Meta plugins collect metadata during item save. Each plugin produces one or more key-value pairs:
- FileEncoding - File encoding detection using file command
- LineCount - Line count using wc command - `magic_file` - File type detection using libmagic (when `magic` feature enabled)
- WordCount - Word count using wc command - `infer` - MIME type detection using infer crate (when `infer` feature enabled)
- Cwd - Current working directory - `tree_magic_mini` - MIME type detection using tree_magic_mini (when `tree_magic_mini` feature enabled)
- Binary - Binary file detection - `tokens` - LLM token counting using tiktoken (when `tokens` feature enabled)
- Uid - Current user ID - `text` - Text analysis: line count, word count, char count, line average length
- User - Current username - `digest` - SHA-256 and MD5 checksums
- Gid - Current group ID - `hostname` - System hostname (full and short)
- Group - Current group name - `cwd` - Current working directory
- Shell - Shell path from SHELL environment variable - `user` - Current username and UID
- ShellPid - Shell process ID from PPID environment variable - `shell` - Shell path from SHELL environment variable
- KeepPid - Keep process ID - `shell_pid` - Shell process ID from PPID
- DigestSha256 - SHA-256 digest - `keep_pid` - Keep process ID
- DigestMd5 - MD5 digest using md5sum command - `env` - Arbitrary environment variables (via `KEEP_META_ENV_*` prefix)
- ReadTime - Time taken to read data - `exec` - Execute external commands for custom metadata
- ReadRate - Rate of data reading - `read_time` - Time taken to read content
- Hostname - System hostname - `read_rate` - Content read rate (bytes/second)
- FullHostname - Fully qualified domain name
## Testing Strategy ## Testing Strategy
- Unit tests for each module in `src/tests/` - Unit tests for each module in `src/tests/`

View File

@@ -345,8 +345,8 @@ Items are compressed automatically on save. Default: LZ4.
| `gzip` | Internal | Fast | Good | | `gzip` | Internal | Fast | Good |
| `bzip2` | External | Slow | Better | | `bzip2` | External | Slow | Better |
| `xz` | External | Slowest | Best | | `xz` | External | Slowest | Best |
| `zstd` | External | Fast | Good | | `zstd` | Internal | Fast | Good |
| `none` | Internal | N/A | N/A | | `raw` | Internal | N/A | N/A |
```sh ```sh
# Specify compression per item # Specify compression per item

View File

@@ -29,9 +29,7 @@ pub struct ModeArgs {
pub save: bool, pub save: bool,
#[arg(group("mode"), help_heading("Mode Options"), short, long, conflicts_with_all(["save", "diff", "list", "delete", "info", "update", "status", "export", "import"]))] #[arg(group("mode"), help_heading("Mode Options"), short, long, conflicts_with_all(["save", "diff", "list", "delete", "info", "update", "status", "export", "import"]))]
#[arg(help( #[arg(help("Get an item either by its ID or by a combination of matching tags and metadata"))]
"Get an item either by it's ID or by a combination of matching tags and metatdata"
))]
pub get: bool, pub get: bool,
#[arg(group("mode"), help_heading("Mode Options"), long, conflicts_with_all(["save", "get", "list", "delete", "info", "update", "status", "export", "import"]))] #[arg(group("mode"), help_heading("Mode Options"), long, conflicts_with_all(["save", "get", "list", "delete", "info", "update", "status", "export", "import"]))]
@@ -48,9 +46,7 @@ pub struct ModeArgs {
pub delete: bool, pub delete: bool,
#[arg(group("mode"), help_heading("Mode Options"), short, long, conflicts_with_all(["save", "get", "diff", "list", "delete", "update", "status", "export", "import"]))] #[arg(group("mode"), help_heading("Mode Options"), short, long, conflicts_with_all(["save", "get", "diff", "list", "delete", "update", "status", "export", "import"]))]
#[arg(help( #[arg(help("Get an item either by its ID or by a combination of matching tags and metadata"))]
"Get an item either by it's ID or by a combination of matching tags and metatdata"
))]
pub info: bool, pub info: bool,
#[arg(group("mode"), help_heading("Mode Options"), short('u'), long, conflicts_with_all(["save", "get", "diff", "list", "delete", "info", "status", "export", "import"]))] #[arg(group("mode"), help_heading("Mode Options"), short('u'), long, conflicts_with_all(["save", "get", "diff", "list", "delete", "info", "status", "export", "import"]))]

View File

@@ -89,7 +89,7 @@ pub fn generate_status_info(
}; };
let _default_type = crate::compression_engine::default_compression_type(); let _default_type = crate::compression_engine::default_compression_type();
let mut compression_info = Vec::new(); let mut compression_info = Vec::with_capacity(CompressionType::iter().count());
// Sort compression types by their string representation // Sort compression types by their string representation
let mut sorted_compression_types: Vec<CompressionType> = CompressionType::iter().collect(); let mut sorted_compression_types: Vec<CompressionType> = CompressionType::iter().collect();
@@ -141,7 +141,8 @@ pub fn generate_status_info(
}); });
} }
let mut meta_plugins_map = std::collections::HashMap::new(); let mut meta_plugins_map =
std::collections::HashMap::with_capacity(MetaPluginType::iter().count());
let mut enabled_meta_plugins_vec = Vec::new(); let mut enabled_meta_plugins_vec = Vec::new();
// Sort meta plugin types by their string representation to avoid creating plugins just for sorting // Sort meta plugin types by their string representation to avoid creating plugins just for sorting

View File

@@ -7,8 +7,6 @@ use strum::{Display, EnumIter, EnumString};
use log::*; use log::*;
use lazy_static::lazy_static;
extern crate enum_map; extern crate enum_map;
use enum_map::enum_map; use enum_map::enum_map;
use enum_map::{Enum, EnumMap}; use enum_map::{Enum, EnumMap};
@@ -180,10 +178,9 @@ impl Clone for Box<dyn CompressionEngine> {
} }
} }
lazy_static! { fn init_compression_engines() -> EnumMap<CompressionType, Box<dyn CompressionEngine>> {
static ref COMPRESSION_ENGINES: EnumMap<CompressionType, Box<dyn CompressionEngine>> = { #[allow(unused_mut)]
#[allow(unused_mut)] // mut needed when gzip/lz4 features are enabled let mut em: EnumMap<CompressionType, Box<dyn CompressionEngine>> = enum_map! {
let mut em = enum_map! {
CompressionType::LZ4 => Box::new(crate::compression_engine::program::CompressionEngineProgram::new( CompressionType::LZ4 => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
"lz4", "lz4",
vec!["-c"], vec!["-c"],
@@ -234,9 +231,12 @@ lazy_static! {
} }
em em
};
} }
static COMPRESSION_ENGINES: std::sync::LazyLock<
EnumMap<CompressionType, Box<dyn CompressionEngine>>,
> = std::sync::LazyLock::new(init_compression_engines);
pub fn default_compression_type() -> CompressionType { pub fn default_compression_type() -> CompressionType {
CompressionType::LZ4 CompressionType::LZ4
} }

View File

@@ -1,6 +1,5 @@
use anyhow::{Context, Error, Result, anyhow}; use anyhow::{Context, Error, Result, anyhow};
use chrono::prelude::*; use chrono::prelude::*;
use lazy_static::lazy_static;
use log::*; use log::*;
use rusqlite::{Connection, OpenFlags, Row, params}; use rusqlite::{Connection, OpenFlags, Row, params};
use rusqlite_migration::{M, Migrations}; use rusqlite_migration::{M, Migrations};
@@ -47,25 +46,21 @@ let id = db::insert_item(&conn, item)?;
``` ```
*/ */
lazy_static! { static MIGRATIONS: std::sync::LazyLock<Migrations<'static>> = std::sync::LazyLock::new(|| {
// Database schema migrations for the Keep application. Migrations::new(vec![
//
// Defines the sequence of migrations to create and update the schema.
// Applied automatically when opening a database connection.
static ref MIGRATIONS: Migrations<'static> = Migrations::new(vec![
M::up( M::up(
"CREATE TABLE items( "CREATE TABLE items(
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
ts TEXT NOT NULL, ts TEXT NOT NULL,
size INTEGER NULL, size INTEGER NULL,
compression TEXT NOT NULL)" compression TEXT NOT NULL)",
), ),
M::up( M::up(
"CREATE TABLE tags ( "CREATE TABLE tags (
id INTEGER NOT NULL, id INTEGER NOT NULL,
name TEXT NOT NULL, name TEXT NOT NULL,
FOREIGN KEY(id) REFERENCES items(id) ON DELETE CASCADE, FOREIGN KEY(id) REFERENCES items(id) ON DELETE CASCADE,
PRIMARY KEY(id, name));" PRIMARY KEY(id, name));",
), ),
M::up( M::up(
"CREATE TABLE metas ( "CREATE TABLE metas (
@@ -73,16 +68,17 @@ lazy_static! {
name TEXT NOT NULL, name TEXT NOT NULL,
value TEXT NOT NULL, value TEXT NOT NULL,
FOREIGN KEY(id) REFERENCES items(id) ON DELETE CASCADE, FOREIGN KEY(id) REFERENCES items(id) ON DELETE CASCADE,
PRIMARY KEY(id, name));" PRIMARY KEY(id, name));",
), ),
M::up("CREATE INDEX idx_tags_name ON tags(name)"), M::up("CREATE INDEX idx_tags_name ON tags(name)"),
M::up("CREATE INDEX idx_metas_name ON metas(name)"), M::up("CREATE INDEX idx_metas_name ON metas(name)"),
M::up("CREATE INDEX idx_items_ts ON items(ts)"),
M::up("UPDATE items SET compression = 'raw' WHERE compression = 'none'"), M::up("UPDATE items SET compression = 'raw' WHERE compression = 'none'"),
M::up("ALTER TABLE items RENAME COLUMN size TO uncompressed_size"), M::up("ALTER TABLE items RENAME COLUMN size TO uncompressed_size"),
M::up("ALTER TABLE items ADD COLUMN compressed_size INTEGER NULL"), M::up("ALTER TABLE items ADD COLUMN compressed_size INTEGER NULL"),
M::up("ALTER TABLE items ADD COLUMN closed BOOLEAN NOT NULL DEFAULT 1"), M::up("ALTER TABLE items ADD COLUMN closed BOOLEAN NOT NULL DEFAULT 1"),
]); ])
} });
/// Represents an item stored in the database. /// Represents an item stored in the database.
/// ///

View File

@@ -213,6 +213,44 @@ pub enum FilterType {
/// Prevents OOM on large files by rejecting inputs that exceed this limit. /// Prevents OOM on large files by rejecting inputs that exceed this limit.
const MAX_FILTER_BUFFER_SIZE: usize = 256 * 1024 * 1024; const MAX_FILTER_BUFFER_SIZE: usize = 256 * 1024 * 1024;
struct BoundedVecWriter {
data: Vec<u8>,
limit: usize,
}
impl BoundedVecWriter {
fn new(limit: usize) -> Self {
Self {
data: Vec::new(),
limit,
}
}
fn into_inner(self) -> Vec<u8> {
self.data
}
}
impl std::io::Write for BoundedVecWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
if self.data.len() + buf.len() > self.limit {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!(
"Input size exceeds maximum filter buffer size ({} bytes)",
MAX_FILTER_BUFFER_SIZE
),
));
}
self.data.write_all(buf)?;
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
/// A chain of filter plugins applied sequentially. /// A chain of filter plugins applied sequentially.
/// ///
/// Chains multiple filters, applying them in order to the input stream. /// Chains multiple filters, applying them in order to the input stream.
@@ -360,21 +398,10 @@ impl FilterChain {
} }
// For multiple plugins, we need to chain them together // For multiple plugins, we need to chain them together
// We'll use a temporary buffer to hold intermediate results // We'll use a bounded buffer to hold intermediate results
let mut current_data = Vec::new(); let mut bounded_writer = BoundedVecWriter::new(MAX_FILTER_BUFFER_SIZE);
std::io::copy(reader, &mut current_data)?; std::io::copy(reader, &mut bounded_writer)?;
let mut current_data = bounded_writer.into_inner();
if current_data.len() > MAX_FILTER_BUFFER_SIZE {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!(
"Input size ({} bytes) exceeds maximum filter buffer size ({} bytes). \
Consider using fewer filter plugins or smaller inputs.",
current_data.len(),
MAX_FILTER_BUFFER_SIZE
),
));
}
// Store the plugins length to avoid borrowing issues // Store the plugins length to avoid borrowing issues
let plugins_len = self.plugins.len(); let plugins_len = self.plugins.len();

View File

@@ -1,5 +1,4 @@
use log::{debug, warn}; use log::{debug, warn};
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
@@ -444,9 +443,9 @@ where
/// ///
/// An empty `HashMap` (default implementation). /// An empty `HashMap` (default implementation).
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> { fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
use once_cell::sync::Lazy; use std::sync::LazyLock;
static EMPTY: Lazy<std::collections::HashMap<String, serde_yaml::Value>> = static EMPTY: LazyLock<std::collections::HashMap<String, serde_yaml::Value>> =
Lazy::new(std::collections::HashMap::new); LazyLock::new(std::collections::HashMap::new);
&EMPTY &EMPTY
} }
@@ -471,9 +470,9 @@ where
/// ///
/// An empty `HashMap` (default implementation). /// An empty `HashMap` (default implementation).
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> { fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
use once_cell::sync::Lazy; use std::sync::LazyLock;
static EMPTY: Lazy<std::collections::HashMap<String, serde_yaml::Value>> = static EMPTY: LazyLock<std::collections::HashMap<String, serde_yaml::Value>> =
Lazy::new(std::collections::HashMap::new); LazyLock::new(std::collections::HashMap::new);
&EMPTY &EMPTY
} }
@@ -602,8 +601,9 @@ where
} }
/// Global registry for meta plugins. /// Global registry for meta plugins.
static META_PLUGIN_REGISTRY: Lazy<Mutex<HashMap<MetaPluginType, PluginConstructor>>> = static META_PLUGIN_REGISTRY: std::sync::LazyLock<
Lazy::new(|| Mutex::new(HashMap::new())); Mutex<HashMap<MetaPluginType, PluginConstructor>>,
> = std::sync::LazyLock::new(|| Mutex::new(HashMap::new()));
/// Register a meta plugin with the global registry. /// Register a meta plugin with the global registry.
/// ///

View File

@@ -21,7 +21,6 @@ use chrono::{DateTime, Utc};
use clap::Command; use clap::Command;
use clap::error::ErrorKind; use clap::error::ErrorKind;
use comfy_table::{Attribute, Cell, ContentArrangement, Table}; use comfy_table::{Attribute, Cell, ContentArrangement, Table};
use lazy_static::lazy_static;
use log::debug; use log::debug;
use regex::Regex; use regex::Regex;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@@ -57,9 +56,8 @@ pub enum OutputFormat {
Yaml, Yaml,
} }
lazy_static! { static KEEP_META_RE: std::sync::LazyLock<Regex> =
static ref KEEP_META_RE: Regex = Regex::new(r"^KEEP_META_(.+)$").unwrap(); std::sync::LazyLock::new(|| Regex::new(r"^KEEP_META_(.+)$").unwrap());
}
pub const IMPORT_FORMAT_ERROR: &str = pub const IMPORT_FORMAT_ERROR: &str =
"Unsupported import format: {} (expected .keep.tar or .meta.yml)"; "Unsupported import format: {} (expected .keep.tar or .meta.yml)";

View File

@@ -178,6 +178,7 @@ pub async fn handle_list_items(
let item_infos: Vec<ItemInfo> = items_with_meta let item_infos: Vec<ItemInfo> = items_with_meta
.into_iter() .into_iter()
.filter_map(|iwm| ItemInfo::try_from(iwm).ok()) .filter_map(|iwm| ItemInfo::try_from(iwm).ok())
.map(|info| info.with_file_size(&state.data_dir))
.collect(); .collect();
ResponseBuilder::json(ApiResponse::ok(item_infos)) ResponseBuilder::json(ApiResponse::ok(item_infos))
@@ -339,6 +340,7 @@ pub async fn handle_post_item(
let db = state.db.clone(); let db = state.db.clone();
let item_service = state.item_service.clone(); let item_service = state.item_service.clone();
let settings = state.settings.clone(); let settings = state.settings.clone();
let data_dir = state.data_dir.clone();
// Parse tags from query parameter // Parse tags from query parameter
let tags: Vec<String> = params let tags: Vec<String> = params
@@ -472,7 +474,9 @@ pub async fn handle_post_item(
return Err(StatusCode::PAYLOAD_TOO_LARGE); return Err(StatusCode::PAYLOAD_TOO_LARGE);
} }
let item_info = ItemInfo::try_from(item_with_meta).map_err(|e| { let item_info = ItemInfo::try_from(item_with_meta)
.map(|info| info.with_file_size(&data_dir))
.map_err(|e| {
warn!("Item conversion failed: {e}"); warn!("Item conversion failed: {e}");
StatusCode::INTERNAL_SERVER_ERROR StatusCode::INTERNAL_SERVER_ERROR
})?; })?;
@@ -1092,6 +1096,7 @@ pub async fn handle_delete_item(
compression: deleted_item.compression, compression: deleted_item.compression,
tags: vec![], tags: vec![],
metadata: HashMap::new(), metadata: HashMap::new(),
file_size: None,
}; };
Ok(Json(ApiResponse::ok(item_info))) Ok(Json(ApiResponse::ok(item_info)))
@@ -1124,6 +1129,7 @@ pub async fn handle_get_item_info(
let db = state.db.clone(); let db = state.db.clone();
let item_service = state.item_service.clone(); let item_service = state.item_service.clone();
let data_dir = state.data_dir.clone();
let item_with_meta = task::spawn_blocking(move || { let item_with_meta = task::spawn_blocking(move || {
let conn = db.blocking_lock(); let conn = db.blocking_lock();
@@ -1136,7 +1142,9 @@ pub async fn handle_get_item_info(
})? })?
.map_err(handle_item_error)?; .map_err(handle_item_error)?;
let item_info = ItemInfo::try_from(item_with_meta).map_err(|e| { let item_info = ItemInfo::try_from(item_with_meta)
.map(|info| info.with_file_size(&data_dir))
.map_err(|e| {
warn!("Item conversion failed: {e}"); warn!("Item conversion failed: {e}");
StatusCode::INTERNAL_SERVER_ERROR StatusCode::INTERNAL_SERVER_ERROR
})?; })?;
@@ -1352,6 +1360,7 @@ pub async fn handle_update_item(
let db = state.db.clone(); let db = state.db.clone();
let item_service = state.item_service.clone(); let item_service = state.item_service.clone();
let settings = state.settings.clone(); let settings = state.settings.clone();
let data_dir = state.data_dir.clone();
let size_param = params.uncompressed_size; let size_param = params.uncompressed_size;
let item_info = task::spawn_blocking(move || { let item_info = task::spawn_blocking(move || {
@@ -1369,7 +1378,9 @@ pub async fn handle_update_item(
return Err(StatusCode::INTERNAL_SERVER_ERROR); return Err(StatusCode::INTERNAL_SERVER_ERROR);
} }
match item_service.get_item(&conn, item_id) { match item_service.get_item(&conn, item_id) {
Ok(iwm) => ItemInfo::try_from(iwm).map_err(|e| { Ok(iwm) => ItemInfo::try_from(iwm)
.map(|info| info.with_file_size(&data_dir))
.map_err(|e| {
warn!("Item conversion failed: {e}"); warn!("Item conversion failed: {e}");
StatusCode::INTERNAL_SERVER_ERROR StatusCode::INTERNAL_SERVER_ERROR
}), }),
@@ -1392,7 +1403,9 @@ pub async fn handle_update_item(
); );
match result { match result {
Ok(item_with_meta) => ItemInfo::try_from(item_with_meta).map_err(|e| { Ok(item_with_meta) => ItemInfo::try_from(item_with_meta)
.map(|info| info.with_file_size(&data_dir))
.map_err(|e| {
warn!("Item conversion failed: {e}"); warn!("Item conversion failed: {e}");
StatusCode::INTERNAL_SERVER_ERROR StatusCode::INTERNAL_SERVER_ERROR
}), }),

View File

@@ -366,10 +366,13 @@ pub struct StatusInfoResponse {
/// let item_info = ItemInfo { /// let item_info = ItemInfo {
/// id: 42, /// id: 42,
/// ts: "2023-12-01T15:30:45Z".to_string(), /// ts: "2023-12-01T15:30:45Z".to_string(),
/// size: Some(1024), /// uncompressed_size: Some(1024),
/// compressed_size: Some(512),
/// closed: true,
/// compression: "gzip".to_string(), /// compression: "gzip".to_string(),
/// tags: vec!["important".to_string()], /// tags: vec!["important".to_string()],
/// metadata: HashMap::from([("mime_type".to_string(), "text/plain".to_string())]), /// metadata: HashMap::from([("mime_type".to_string(), "text/plain".to_string())]),
/// file_size: Some(512),
/// }; /// };
/// ``` /// ```
#[derive(Serialize, Deserialize, ToSchema)] #[derive(Serialize, Deserialize, ToSchema)]
@@ -413,6 +416,33 @@ pub struct ItemInfo {
/// Key-value pairs containing additional metadata about the item. /// Key-value pairs containing additional metadata about the item.
#[schema(example = json!({"mime_type": "text/plain", "mime_encoding": "utf-8", "line_count": "42"}))] #[schema(example = json!({"mime_type": "text/plain", "mime_encoding": "utf-8", "line_count": "42"}))]
pub metadata: HashMap<String, String>, pub metadata: HashMap<String, String>,
/// Actual file size in bytes.
///
/// The filesystem-reported size of the item's data file. This may differ from
/// `compressed_size` if the file was written and the database hasn't been updated.
/// None if the file cannot be read (e.g., file not found, permission denied).
#[schema(example = 512)]
pub file_size: Option<i64>,
}
impl ItemInfo {
/// Enriches this `ItemInfo` with the actual filesystem-reported size.
///
/// Reads the size of the item's data file from disk and sets `file_size`.
/// If the file cannot be read, `file_size` is left as None.
///
/// # Arguments
///
/// * `data_dir` - The data directory path containing item files.
///
/// # Returns
///
/// A new `ItemInfo` with `file_size` populated from the filesystem.
pub fn with_file_size(mut self, data_dir: &std::path::Path) -> Self {
let item_path = data_dir.join(self.id.to_string());
self.file_size = std::fs::metadata(&item_path).map(|m| m.len() as i64).ok();
self
}
} }
impl TryFrom<ItemWithMeta> for ItemInfo { impl TryFrom<ItemWithMeta> for ItemInfo {
@@ -433,6 +463,7 @@ impl TryFrom<ItemWithMeta> for ItemInfo {
compression: item_with_meta.item.compression, compression: item_with_meta.item.compression,
tags, tags,
metadata, metadata,
file_size: None,
}) })
} }
} }

View File

@@ -13,11 +13,13 @@ use serde::Deserialize;
use std::collections::HashMap; use std::collections::HashMap;
/// Escape text content for safe HTML insertion. /// Escape text content for safe HTML insertion.
#[inline]
fn esc(s: &str) -> String { fn esc(s: &str) -> String {
encode_text(s).to_string() encode_text(s).to_string()
} }
/// Escape attribute values for safe HTML attribute insertion. /// Escape attribute values for safe HTML attribute insertion.
#[inline]
fn esc_attr(s: &str) -> String { fn esc_attr(s: &str) -> String {
encode_double_quoted_attribute(s).to_string() encode_double_quoted_attribute(s).to_string()
} }

View File

@@ -1,5 +1,4 @@
use crate::filter_plugin::{FilterChain, parse_filter_string}; use crate::filter_plugin::{FilterChain, parse_filter_string};
use once_cell::sync::Lazy;
use std::collections::HashMap; use std::collections::HashMap;
use std::io::{Read, Result, Write}; use std::io::{Read, Result, Write};
use std::sync::Mutex; use std::sync::Mutex;
@@ -166,8 +165,8 @@ impl FilterService {
/// # Panics /// # Panics
/// ///
/// Lock acquisition failures (rare) cause panics in accessors. /// Lock acquisition failures (rare) cause panics in accessors.
static FILTER_PLUGIN_REGISTRY: Lazy<Mutex<HashMap<String, FilterConstructor>>> = static FILTER_PLUGIN_REGISTRY: std::sync::LazyLock<Mutex<HashMap<String, FilterConstructor>>> =
Lazy::new(|| Mutex::new(HashMap::new())); std::sync::LazyLock::new(|| Mutex::new(HashMap::new()));
/// Registers a filter plugin in the global registry. /// Registers a filter plugin in the global registry.
/// ///

View File

@@ -1,3 +1,8 @@
/// Business logic services for the Keep application.
///
/// This module provides the core service layer that orchestrates item storage,
/// compression, metadata collection, and filtering. Services are used by both
/// local CLI modes and the HTTP server.
pub mod compression_service; pub mod compression_service;
pub mod error; pub mod error;
pub mod filter_service; pub mod filter_service;

View File

@@ -1,5 +1,4 @@
use anyhow::{Result, bail}; use anyhow::{Result, bail};
use once_cell::sync::Lazy;
/// Supported LLM token encodings. /// Supported LLM token encodings.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
@@ -48,10 +47,10 @@ impl std::fmt::Debug for Tokenizer {
} }
/// Static tokenizer instances — loaded once per process, shared across all plugins. /// Static tokenizer instances — loaded once per process, shared across all plugins.
static CL100K: Lazy<Tokenizer> = Lazy::new(|| { static CL100K: std::sync::LazyLock<Tokenizer> = std::sync::LazyLock::new(|| {
Tokenizer::new(TokenEncoding::Cl100kBase).expect("Failed to create cl100k_base tokenizer") Tokenizer::new(TokenEncoding::Cl100kBase).expect("Failed to create cl100k_base tokenizer")
}); });
static O200K: Lazy<Tokenizer> = Lazy::new(|| { static O200K: std::sync::LazyLock<Tokenizer> = std::sync::LazyLock::new(|| {
Tokenizer::new(TokenEncoding::O200kBase).expect("Failed to create o200k_base tokenizer") Tokenizer::new(TokenEncoding::O200kBase).expect("Failed to create o200k_base tokenizer")
}); });