Compare commits
3 Commits
ab2fb07505
...
e2cb36d2a8
| Author | SHA1 | Date | |
|---|---|---|---|
| e2cb36d2a8 | |||
| 0004324301 | |||
| b3edfe7de6 |
26
CHANGELOG.md
26
CHANGELOG.md
@@ -7,6 +7,32 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- Database index on `items(ts)` column for faster ORDER BY sorting
|
||||
- Server API `ItemInfo` now includes `file_size` — actual filesystem-reported size of the item data file
|
||||
|
||||
### Changed
|
||||
|
||||
- Filter plugins check size before loading content into memory (prevents OOM on large inputs)
|
||||
- Status page pre-allocates collections with known capacities (meta plugins, compression info)
|
||||
- `#[inline]` on HTML escape helper functions (`esc`, `esc_attr`) for hot path performance
|
||||
- Removed `once_cell` crate (replaced with `std::sync::LazyLock` from Rust 1.80)
|
||||
- Removed `lazy_static` crate (replaced with `std::sync::LazyLock`)
|
||||
|
||||
### Fixed
|
||||
|
||||
- CLI help text typo: "metatdata" → "metadata" in `--get` and `--info` descriptions
|
||||
|
||||
### Refactored
|
||||
|
||||
- Added module-level documentation to `services/` module
|
||||
|
||||
### Documentation
|
||||
|
||||
- README.md: Fixed compression table — zstd is native (not external), "none" renamed to "raw"
|
||||
- DESIGN.md: Updated schema to reflect current `items` table columns and meta plugin inventory
|
||||
|
||||
## [0.1.0] - 2026-03-21
|
||||
|
||||
### Added
|
||||
|
||||
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -1727,7 +1727,6 @@ dependencies = [
|
||||
"inventory",
|
||||
"is-terminal",
|
||||
"jsonwebtoken",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"local-ip-address",
|
||||
"log",
|
||||
@@ -1735,7 +1734,6 @@ dependencies = [
|
||||
"magic",
|
||||
"md5",
|
||||
"nix",
|
||||
"once_cell",
|
||||
"os_pipe",
|
||||
"pest",
|
||||
"pest_derive",
|
||||
|
||||
@@ -35,7 +35,6 @@ hyper = { version = "1.0", features = ["full"] }
|
||||
http-body-util = "0.1"
|
||||
inventory = "0.3"
|
||||
is-terminal = "0.4"
|
||||
lazy_static = "1.5"
|
||||
libc = "0.2"
|
||||
local-ip-address = "0.6"
|
||||
log = "0.4"
|
||||
@@ -45,7 +44,6 @@ magic = { version = "0.13", optional = true }
|
||||
infer = { version = "0.19", optional = true }
|
||||
tree_magic_mini = { version = "3.2", optional = true }
|
||||
nix = { version = "0.30", features = ["fs", "process"] }
|
||||
once_cell = "1.21"
|
||||
comfy-table = "7.2"
|
||||
pwhash = "1.0"
|
||||
regex = "1.10"
|
||||
|
||||
41
DESIGN.md
41
DESIGN.md
@@ -117,7 +117,7 @@
|
||||
## Data Storage
|
||||
|
||||
### Database Schema
|
||||
- `items` table: id (primary key), ts (timestamp), size (optional), compression
|
||||
- `items` table: id (primary key), ts (timestamp), uncompressed_size (optional), compressed_size (optional), closed (boolean), compression
|
||||
- `tags` table: id (foreign key to items), name (tag name)
|
||||
- `metas` table: id (foreign key to items), name (meta key), value (meta value)
|
||||
- Indexes on tag names and meta names for faster queries
|
||||
@@ -178,26 +178,25 @@
|
||||
- None (no compression)
|
||||
|
||||
## Supported Meta Plugins
|
||||
- FileMagic - File type detection using file command
|
||||
- FileMime - MIME type detection using file command
|
||||
- FileEncoding - File encoding detection using file command
|
||||
- LineCount - Line count using wc command
|
||||
- WordCount - Word count using wc command
|
||||
- Cwd - Current working directory
|
||||
- Binary - Binary file detection
|
||||
- Uid - Current user ID
|
||||
- User - Current username
|
||||
- Gid - Current group ID
|
||||
- Group - Current group name
|
||||
- Shell - Shell path from SHELL environment variable
|
||||
- ShellPid - Shell process ID from PPID environment variable
|
||||
- KeepPid - Keep process ID
|
||||
- DigestSha256 - SHA-256 digest
|
||||
- DigestMd5 - MD5 digest using md5sum command
|
||||
- ReadTime - Time taken to read data
|
||||
- ReadRate - Rate of data reading
|
||||
- Hostname - System hostname
|
||||
- FullHostname - Fully qualified domain name
|
||||
|
||||
Meta plugins collect metadata during item save. Each plugin produces one or more key-value pairs:
|
||||
|
||||
- `magic_file` - File type detection using libmagic (when `magic` feature enabled)
|
||||
- `infer` - MIME type detection using infer crate (when `infer` feature enabled)
|
||||
- `tree_magic_mini` - MIME type detection using tree_magic_mini (when `tree_magic_mini` feature enabled)
|
||||
- `tokens` - LLM token counting using tiktoken (when `tokens` feature enabled)
|
||||
- `text` - Text analysis: line count, word count, char count, line average length
|
||||
- `digest` - SHA-256 and MD5 checksums
|
||||
- `hostname` - System hostname (full and short)
|
||||
- `cwd` - Current working directory
|
||||
- `user` - Current username and UID
|
||||
- `shell` - Shell path from SHELL environment variable
|
||||
- `shell_pid` - Shell process ID from PPID
|
||||
- `keep_pid` - Keep process ID
|
||||
- `env` - Arbitrary environment variables (via `KEEP_META_ENV_*` prefix)
|
||||
- `exec` - Execute external commands for custom metadata
|
||||
- `read_time` - Time taken to read content
|
||||
- `read_rate` - Content read rate (bytes/second)
|
||||
|
||||
## Testing Strategy
|
||||
- Unit tests for each module in `src/tests/`
|
||||
|
||||
@@ -345,8 +345,8 @@ Items are compressed automatically on save. Default: LZ4.
|
||||
| `gzip` | Internal | Fast | Good |
|
||||
| `bzip2` | External | Slow | Better |
|
||||
| `xz` | External | Slowest | Best |
|
||||
| `zstd` | External | Fast | Good |
|
||||
| `none` | Internal | N/A | N/A |
|
||||
| `zstd` | Internal | Fast | Good |
|
||||
| `raw` | Internal | N/A | N/A |
|
||||
|
||||
```sh
|
||||
# Specify compression per item
|
||||
|
||||
@@ -29,9 +29,7 @@ pub struct ModeArgs {
|
||||
pub save: bool,
|
||||
|
||||
#[arg(group("mode"), help_heading("Mode Options"), short, long, conflicts_with_all(["save", "diff", "list", "delete", "info", "update", "status", "export", "import"]))]
|
||||
#[arg(help(
|
||||
"Get an item either by it's ID or by a combination of matching tags and metatdata"
|
||||
))]
|
||||
#[arg(help("Get an item either by its ID or by a combination of matching tags and metadata"))]
|
||||
pub get: bool,
|
||||
|
||||
#[arg(group("mode"), help_heading("Mode Options"), long, conflicts_with_all(["save", "get", "list", "delete", "info", "update", "status", "export", "import"]))]
|
||||
@@ -48,9 +46,7 @@ pub struct ModeArgs {
|
||||
pub delete: bool,
|
||||
|
||||
#[arg(group("mode"), help_heading("Mode Options"), short, long, conflicts_with_all(["save", "get", "diff", "list", "delete", "update", "status", "export", "import"]))]
|
||||
#[arg(help(
|
||||
"Get an item either by it's ID or by a combination of matching tags and metatdata"
|
||||
))]
|
||||
#[arg(help("Get an item either by its ID or by a combination of matching tags and metadata"))]
|
||||
pub info: bool,
|
||||
|
||||
#[arg(group("mode"), help_heading("Mode Options"), short('u'), long, conflicts_with_all(["save", "get", "diff", "list", "delete", "info", "status", "export", "import"]))]
|
||||
|
||||
@@ -89,7 +89,7 @@ pub fn generate_status_info(
|
||||
};
|
||||
|
||||
let _default_type = crate::compression_engine::default_compression_type();
|
||||
let mut compression_info = Vec::new();
|
||||
let mut compression_info = Vec::with_capacity(CompressionType::iter().count());
|
||||
|
||||
// Sort compression types by their string representation
|
||||
let mut sorted_compression_types: Vec<CompressionType> = CompressionType::iter().collect();
|
||||
@@ -141,7 +141,8 @@ pub fn generate_status_info(
|
||||
});
|
||||
}
|
||||
|
||||
let mut meta_plugins_map = std::collections::HashMap::new();
|
||||
let mut meta_plugins_map =
|
||||
std::collections::HashMap::with_capacity(MetaPluginType::iter().count());
|
||||
let mut enabled_meta_plugins_vec = Vec::new();
|
||||
|
||||
// Sort meta plugin types by their string representation to avoid creating plugins just for sorting
|
||||
|
||||
@@ -7,8 +7,6 @@ use strum::{Display, EnumIter, EnumString};
|
||||
|
||||
use log::*;
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
extern crate enum_map;
|
||||
use enum_map::enum_map;
|
||||
use enum_map::{Enum, EnumMap};
|
||||
@@ -180,63 +178,65 @@ impl Clone for Box<dyn CompressionEngine> {
|
||||
}
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref COMPRESSION_ENGINES: EnumMap<CompressionType, Box<dyn CompressionEngine>> = {
|
||||
#[allow(unused_mut)] // mut needed when gzip/lz4 features are enabled
|
||||
let mut em = enum_map! {
|
||||
CompressionType::LZ4 => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
||||
"lz4",
|
||||
vec!["-c"],
|
||||
vec!["-d", "-c"]
|
||||
)) as Box<dyn CompressionEngine>,
|
||||
CompressionType::GZip => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
||||
"gzip",
|
||||
vec!["-c"],
|
||||
vec!["-d", "-c"]
|
||||
)) as Box<dyn CompressionEngine>,
|
||||
CompressionType::BZip2 => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
||||
"bzip2",
|
||||
vec!["-c"],
|
||||
vec!["-d", "-c"]
|
||||
)) as Box<dyn CompressionEngine>,
|
||||
CompressionType::XZ => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
||||
"xz",
|
||||
vec!["-c"],
|
||||
vec!["-d", "-c"]
|
||||
)) as Box<dyn CompressionEngine>,
|
||||
CompressionType::ZStd => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
||||
"zstd",
|
||||
vec!["-c"],
|
||||
vec!["-d", "-c"]
|
||||
)) as Box<dyn CompressionEngine>,
|
||||
CompressionType::Raw => Box::new(crate::compression_engine::raw::CompressionEngineRaw::new()) as Box<dyn CompressionEngine>
|
||||
};
|
||||
|
||||
#[cfg(feature = "gzip")]
|
||||
{
|
||||
em[CompressionType::GZip] =
|
||||
Box::new(crate::compression_engine::gzip::CompressionEngineGZip::new())
|
||||
as Box<dyn CompressionEngine>;
|
||||
}
|
||||
|
||||
#[cfg(feature = "lz4")]
|
||||
{
|
||||
em[CompressionType::LZ4] =
|
||||
Box::new(crate::compression_engine::lz4::CompressionEngineLZ4::new())
|
||||
as Box<dyn CompressionEngine>;
|
||||
}
|
||||
|
||||
#[cfg(feature = "zstd")]
|
||||
{
|
||||
em[CompressionType::ZStd] =
|
||||
Box::new(crate::compression_engine::zstd::CompressionEngineZstd::new())
|
||||
as Box<dyn CompressionEngine>;
|
||||
}
|
||||
|
||||
em
|
||||
fn init_compression_engines() -> EnumMap<CompressionType, Box<dyn CompressionEngine>> {
|
||||
#[allow(unused_mut)]
|
||||
let mut em: EnumMap<CompressionType, Box<dyn CompressionEngine>> = enum_map! {
|
||||
CompressionType::LZ4 => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
||||
"lz4",
|
||||
vec!["-c"],
|
||||
vec!["-d", "-c"]
|
||||
)) as Box<dyn CompressionEngine>,
|
||||
CompressionType::GZip => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
||||
"gzip",
|
||||
vec!["-c"],
|
||||
vec!["-d", "-c"]
|
||||
)) as Box<dyn CompressionEngine>,
|
||||
CompressionType::BZip2 => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
||||
"bzip2",
|
||||
vec!["-c"],
|
||||
vec!["-d", "-c"]
|
||||
)) as Box<dyn CompressionEngine>,
|
||||
CompressionType::XZ => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
||||
"xz",
|
||||
vec!["-c"],
|
||||
vec!["-d", "-c"]
|
||||
)) as Box<dyn CompressionEngine>,
|
||||
CompressionType::ZStd => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
||||
"zstd",
|
||||
vec!["-c"],
|
||||
vec!["-d", "-c"]
|
||||
)) as Box<dyn CompressionEngine>,
|
||||
CompressionType::Raw => Box::new(crate::compression_engine::raw::CompressionEngineRaw::new()) as Box<dyn CompressionEngine>
|
||||
};
|
||||
|
||||
#[cfg(feature = "gzip")]
|
||||
{
|
||||
em[CompressionType::GZip] =
|
||||
Box::new(crate::compression_engine::gzip::CompressionEngineGZip::new())
|
||||
as Box<dyn CompressionEngine>;
|
||||
}
|
||||
|
||||
#[cfg(feature = "lz4")]
|
||||
{
|
||||
em[CompressionType::LZ4] =
|
||||
Box::new(crate::compression_engine::lz4::CompressionEngineLZ4::new())
|
||||
as Box<dyn CompressionEngine>;
|
||||
}
|
||||
|
||||
#[cfg(feature = "zstd")]
|
||||
{
|
||||
em[CompressionType::ZStd] =
|
||||
Box::new(crate::compression_engine::zstd::CompressionEngineZstd::new())
|
||||
as Box<dyn CompressionEngine>;
|
||||
}
|
||||
|
||||
em
|
||||
}
|
||||
|
||||
static COMPRESSION_ENGINES: std::sync::LazyLock<
|
||||
EnumMap<CompressionType, Box<dyn CompressionEngine>>,
|
||||
> = std::sync::LazyLock::new(init_compression_engines);
|
||||
|
||||
pub fn default_compression_type() -> CompressionType {
|
||||
CompressionType::LZ4
|
||||
}
|
||||
|
||||
20
src/db.rs
20
src/db.rs
@@ -1,6 +1,5 @@
|
||||
use anyhow::{Context, Error, Result, anyhow};
|
||||
use chrono::prelude::*;
|
||||
use lazy_static::lazy_static;
|
||||
use log::*;
|
||||
use rusqlite::{Connection, OpenFlags, Row, params};
|
||||
use rusqlite_migration::{M, Migrations};
|
||||
@@ -47,25 +46,21 @@ let id = db::insert_item(&conn, item)?;
|
||||
```
|
||||
*/
|
||||
|
||||
lazy_static! {
|
||||
// Database schema migrations for the Keep application.
|
||||
//
|
||||
// Defines the sequence of migrations to create and update the schema.
|
||||
// Applied automatically when opening a database connection.
|
||||
static ref MIGRATIONS: Migrations<'static> = Migrations::new(vec![
|
||||
static MIGRATIONS: std::sync::LazyLock<Migrations<'static>> = std::sync::LazyLock::new(|| {
|
||||
Migrations::new(vec![
|
||||
M::up(
|
||||
"CREATE TABLE items(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
ts TEXT NOT NULL,
|
||||
size INTEGER NULL,
|
||||
compression TEXT NOT NULL)"
|
||||
compression TEXT NOT NULL)",
|
||||
),
|
||||
M::up(
|
||||
"CREATE TABLE tags (
|
||||
id INTEGER NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
FOREIGN KEY(id) REFERENCES items(id) ON DELETE CASCADE,
|
||||
PRIMARY KEY(id, name));"
|
||||
PRIMARY KEY(id, name));",
|
||||
),
|
||||
M::up(
|
||||
"CREATE TABLE metas (
|
||||
@@ -73,16 +68,17 @@ lazy_static! {
|
||||
name TEXT NOT NULL,
|
||||
value TEXT NOT NULL,
|
||||
FOREIGN KEY(id) REFERENCES items(id) ON DELETE CASCADE,
|
||||
PRIMARY KEY(id, name));"
|
||||
PRIMARY KEY(id, name));",
|
||||
),
|
||||
M::up("CREATE INDEX idx_tags_name ON tags(name)"),
|
||||
M::up("CREATE INDEX idx_metas_name ON metas(name)"),
|
||||
M::up("CREATE INDEX idx_items_ts ON items(ts)"),
|
||||
M::up("UPDATE items SET compression = 'raw' WHERE compression = 'none'"),
|
||||
M::up("ALTER TABLE items RENAME COLUMN size TO uncompressed_size"),
|
||||
M::up("ALTER TABLE items ADD COLUMN compressed_size INTEGER NULL"),
|
||||
M::up("ALTER TABLE items ADD COLUMN closed BOOLEAN NOT NULL DEFAULT 1"),
|
||||
]);
|
||||
}
|
||||
])
|
||||
});
|
||||
|
||||
/// Represents an item stored in the database.
|
||||
///
|
||||
|
||||
@@ -213,6 +213,44 @@ pub enum FilterType {
|
||||
/// Prevents OOM on large files by rejecting inputs that exceed this limit.
|
||||
const MAX_FILTER_BUFFER_SIZE: usize = 256 * 1024 * 1024;
|
||||
|
||||
struct BoundedVecWriter {
|
||||
data: Vec<u8>,
|
||||
limit: usize,
|
||||
}
|
||||
|
||||
impl BoundedVecWriter {
|
||||
fn new(limit: usize) -> Self {
|
||||
Self {
|
||||
data: Vec::new(),
|
||||
limit,
|
||||
}
|
||||
}
|
||||
|
||||
fn into_inner(self) -> Vec<u8> {
|
||||
self.data
|
||||
}
|
||||
}
|
||||
|
||||
impl std::io::Write for BoundedVecWriter {
|
||||
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||
if self.data.len() + buf.len() > self.limit {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
format!(
|
||||
"Input size exceeds maximum filter buffer size ({} bytes)",
|
||||
MAX_FILTER_BUFFER_SIZE
|
||||
),
|
||||
));
|
||||
}
|
||||
self.data.write_all(buf)?;
|
||||
Ok(buf.len())
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> std::io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// A chain of filter plugins applied sequentially.
|
||||
///
|
||||
/// Chains multiple filters, applying them in order to the input stream.
|
||||
@@ -360,21 +398,10 @@ impl FilterChain {
|
||||
}
|
||||
|
||||
// For multiple plugins, we need to chain them together
|
||||
// We'll use a temporary buffer to hold intermediate results
|
||||
let mut current_data = Vec::new();
|
||||
std::io::copy(reader, &mut current_data)?;
|
||||
|
||||
if current_data.len() > MAX_FILTER_BUFFER_SIZE {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
format!(
|
||||
"Input size ({} bytes) exceeds maximum filter buffer size ({} bytes). \
|
||||
Consider using fewer filter plugins or smaller inputs.",
|
||||
current_data.len(),
|
||||
MAX_FILTER_BUFFER_SIZE
|
||||
),
|
||||
));
|
||||
}
|
||||
// We'll use a bounded buffer to hold intermediate results
|
||||
let mut bounded_writer = BoundedVecWriter::new(MAX_FILTER_BUFFER_SIZE);
|
||||
std::io::copy(reader, &mut bounded_writer)?;
|
||||
let mut current_data = bounded_writer.into_inner();
|
||||
|
||||
// Store the plugins length to avoid borrowing issues
|
||||
let plugins_len = self.plugins.len();
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use log::{debug, warn};
|
||||
use once_cell::sync::Lazy;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex};
|
||||
@@ -444,9 +443,9 @@ where
|
||||
///
|
||||
/// An empty `HashMap` (default implementation).
|
||||
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
||||
use once_cell::sync::Lazy;
|
||||
static EMPTY: Lazy<std::collections::HashMap<String, serde_yaml::Value>> =
|
||||
Lazy::new(std::collections::HashMap::new);
|
||||
use std::sync::LazyLock;
|
||||
static EMPTY: LazyLock<std::collections::HashMap<String, serde_yaml::Value>> =
|
||||
LazyLock::new(std::collections::HashMap::new);
|
||||
&EMPTY
|
||||
}
|
||||
|
||||
@@ -471,9 +470,9 @@ where
|
||||
///
|
||||
/// An empty `HashMap` (default implementation).
|
||||
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
||||
use once_cell::sync::Lazy;
|
||||
static EMPTY: Lazy<std::collections::HashMap<String, serde_yaml::Value>> =
|
||||
Lazy::new(std::collections::HashMap::new);
|
||||
use std::sync::LazyLock;
|
||||
static EMPTY: LazyLock<std::collections::HashMap<String, serde_yaml::Value>> =
|
||||
LazyLock::new(std::collections::HashMap::new);
|
||||
&EMPTY
|
||||
}
|
||||
|
||||
@@ -602,8 +601,9 @@ where
|
||||
}
|
||||
|
||||
/// Global registry for meta plugins.
|
||||
static META_PLUGIN_REGISTRY: Lazy<Mutex<HashMap<MetaPluginType, PluginConstructor>>> =
|
||||
Lazy::new(|| Mutex::new(HashMap::new()));
|
||||
static META_PLUGIN_REGISTRY: std::sync::LazyLock<
|
||||
Mutex<HashMap<MetaPluginType, PluginConstructor>>,
|
||||
> = std::sync::LazyLock::new(|| Mutex::new(HashMap::new()));
|
||||
|
||||
/// Register a meta plugin with the global registry.
|
||||
///
|
||||
|
||||
@@ -21,7 +21,6 @@ use chrono::{DateTime, Utc};
|
||||
use clap::Command;
|
||||
use clap::error::ErrorKind;
|
||||
use comfy_table::{Attribute, Cell, ContentArrangement, Table};
|
||||
use lazy_static::lazy_static;
|
||||
use log::debug;
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -57,9 +56,8 @@ pub enum OutputFormat {
|
||||
Yaml,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref KEEP_META_RE: Regex = Regex::new(r"^KEEP_META_(.+)$").unwrap();
|
||||
}
|
||||
static KEEP_META_RE: std::sync::LazyLock<Regex> =
|
||||
std::sync::LazyLock::new(|| Regex::new(r"^KEEP_META_(.+)$").unwrap());
|
||||
|
||||
pub const IMPORT_FORMAT_ERROR: &str =
|
||||
"Unsupported import format: {} (expected .keep.tar or .meta.yml)";
|
||||
|
||||
@@ -178,6 +178,7 @@ pub async fn handle_list_items(
|
||||
let item_infos: Vec<ItemInfo> = items_with_meta
|
||||
.into_iter()
|
||||
.filter_map(|iwm| ItemInfo::try_from(iwm).ok())
|
||||
.map(|info| info.with_file_size(&state.data_dir))
|
||||
.collect();
|
||||
|
||||
ResponseBuilder::json(ApiResponse::ok(item_infos))
|
||||
@@ -339,6 +340,7 @@ pub async fn handle_post_item(
|
||||
let db = state.db.clone();
|
||||
let item_service = state.item_service.clone();
|
||||
let settings = state.settings.clone();
|
||||
let data_dir = state.data_dir.clone();
|
||||
|
||||
// Parse tags from query parameter
|
||||
let tags: Vec<String> = params
|
||||
@@ -472,10 +474,12 @@ pub async fn handle_post_item(
|
||||
return Err(StatusCode::PAYLOAD_TOO_LARGE);
|
||||
}
|
||||
|
||||
let item_info = ItemInfo::try_from(item_with_meta).map_err(|e| {
|
||||
warn!("Item conversion failed: {e}");
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
let item_info = ItemInfo::try_from(item_with_meta)
|
||||
.map(|info| info.with_file_size(&data_dir))
|
||||
.map_err(|e| {
|
||||
warn!("Item conversion failed: {e}");
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
Ok(Json(ApiResponse::ok(item_info)))
|
||||
}
|
||||
@@ -1092,6 +1096,7 @@ pub async fn handle_delete_item(
|
||||
compression: deleted_item.compression,
|
||||
tags: vec![],
|
||||
metadata: HashMap::new(),
|
||||
file_size: None,
|
||||
};
|
||||
|
||||
Ok(Json(ApiResponse::ok(item_info)))
|
||||
@@ -1124,6 +1129,7 @@ pub async fn handle_get_item_info(
|
||||
|
||||
let db = state.db.clone();
|
||||
let item_service = state.item_service.clone();
|
||||
let data_dir = state.data_dir.clone();
|
||||
|
||||
let item_with_meta = task::spawn_blocking(move || {
|
||||
let conn = db.blocking_lock();
|
||||
@@ -1136,10 +1142,12 @@ pub async fn handle_get_item_info(
|
||||
})?
|
||||
.map_err(handle_item_error)?;
|
||||
|
||||
let item_info = ItemInfo::try_from(item_with_meta).map_err(|e| {
|
||||
warn!("Item conversion failed: {e}");
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
let item_info = ItemInfo::try_from(item_with_meta)
|
||||
.map(|info| info.with_file_size(&data_dir))
|
||||
.map_err(|e| {
|
||||
warn!("Item conversion failed: {e}");
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
})?;
|
||||
|
||||
Ok(Json(ApiResponse::ok(item_info)))
|
||||
}
|
||||
@@ -1352,6 +1360,7 @@ pub async fn handle_update_item(
|
||||
let db = state.db.clone();
|
||||
let item_service = state.item_service.clone();
|
||||
let settings = state.settings.clone();
|
||||
let data_dir = state.data_dir.clone();
|
||||
let size_param = params.uncompressed_size;
|
||||
|
||||
let item_info = task::spawn_blocking(move || {
|
||||
@@ -1369,10 +1378,12 @@ pub async fn handle_update_item(
|
||||
return Err(StatusCode::INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
match item_service.get_item(&conn, item_id) {
|
||||
Ok(iwm) => ItemInfo::try_from(iwm).map_err(|e| {
|
||||
warn!("Item conversion failed: {e}");
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
}),
|
||||
Ok(iwm) => ItemInfo::try_from(iwm)
|
||||
.map(|info| info.with_file_size(&data_dir))
|
||||
.map_err(|e| {
|
||||
warn!("Item conversion failed: {e}");
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
}),
|
||||
Err(_) => Err(StatusCode::INTERNAL_SERVER_ERROR),
|
||||
}
|
||||
}
|
||||
@@ -1392,10 +1403,12 @@ pub async fn handle_update_item(
|
||||
);
|
||||
|
||||
match result {
|
||||
Ok(item_with_meta) => ItemInfo::try_from(item_with_meta).map_err(|e| {
|
||||
warn!("Item conversion failed: {e}");
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
}),
|
||||
Ok(item_with_meta) => ItemInfo::try_from(item_with_meta)
|
||||
.map(|info| info.with_file_size(&data_dir))
|
||||
.map_err(|e| {
|
||||
warn!("Item conversion failed: {e}");
|
||||
StatusCode::INTERNAL_SERVER_ERROR
|
||||
}),
|
||||
Err(CoreError::ItemNotFound(_)) => Err(StatusCode::NOT_FOUND),
|
||||
Err(e) => {
|
||||
warn!("Failed to update item {item_id}: {e}");
|
||||
|
||||
@@ -366,10 +366,13 @@ pub struct StatusInfoResponse {
|
||||
/// let item_info = ItemInfo {
|
||||
/// id: 42,
|
||||
/// ts: "2023-12-01T15:30:45Z".to_string(),
|
||||
/// size: Some(1024),
|
||||
/// uncompressed_size: Some(1024),
|
||||
/// compressed_size: Some(512),
|
||||
/// closed: true,
|
||||
/// compression: "gzip".to_string(),
|
||||
/// tags: vec!["important".to_string()],
|
||||
/// metadata: HashMap::from([("mime_type".to_string(), "text/plain".to_string())]),
|
||||
/// file_size: Some(512),
|
||||
/// };
|
||||
/// ```
|
||||
#[derive(Serialize, Deserialize, ToSchema)]
|
||||
@@ -413,6 +416,33 @@ pub struct ItemInfo {
|
||||
/// Key-value pairs containing additional metadata about the item.
|
||||
#[schema(example = json!({"mime_type": "text/plain", "mime_encoding": "utf-8", "line_count": "42"}))]
|
||||
pub metadata: HashMap<String, String>,
|
||||
/// Actual file size in bytes.
|
||||
///
|
||||
/// The filesystem-reported size of the item's data file. This may differ from
|
||||
/// `compressed_size` if the file was written and the database hasn't been updated.
|
||||
/// None if the file cannot be read (e.g., file not found, permission denied).
|
||||
#[schema(example = 512)]
|
||||
pub file_size: Option<i64>,
|
||||
}
|
||||
|
||||
impl ItemInfo {
|
||||
/// Enriches this `ItemInfo` with the actual filesystem-reported size.
|
||||
///
|
||||
/// Reads the size of the item's data file from disk and sets `file_size`.
|
||||
/// If the file cannot be read, `file_size` is left as None.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `data_dir` - The data directory path containing item files.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A new `ItemInfo` with `file_size` populated from the filesystem.
|
||||
pub fn with_file_size(mut self, data_dir: &std::path::Path) -> Self {
|
||||
let item_path = data_dir.join(self.id.to_string());
|
||||
self.file_size = std::fs::metadata(&item_path).map(|m| m.len() as i64).ok();
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<ItemWithMeta> for ItemInfo {
|
||||
@@ -433,6 +463,7 @@ impl TryFrom<ItemWithMeta> for ItemInfo {
|
||||
compression: item_with_meta.item.compression,
|
||||
tags,
|
||||
metadata,
|
||||
file_size: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,11 +13,13 @@ use serde::Deserialize;
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Escape text content for safe HTML insertion.
|
||||
#[inline]
|
||||
fn esc(s: &str) -> String {
|
||||
encode_text(s).to_string()
|
||||
}
|
||||
|
||||
/// Escape attribute values for safe HTML attribute insertion.
|
||||
#[inline]
|
||||
fn esc_attr(s: &str) -> String {
|
||||
encode_double_quoted_attribute(s).to_string()
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use crate::filter_plugin::{FilterChain, parse_filter_string};
|
||||
use once_cell::sync::Lazy;
|
||||
use std::collections::HashMap;
|
||||
use std::io::{Read, Result, Write};
|
||||
use std::sync::Mutex;
|
||||
@@ -166,8 +165,8 @@ impl FilterService {
|
||||
/// # Panics
|
||||
///
|
||||
/// Lock acquisition failures (rare) cause panics in accessors.
|
||||
static FILTER_PLUGIN_REGISTRY: Lazy<Mutex<HashMap<String, FilterConstructor>>> =
|
||||
Lazy::new(|| Mutex::new(HashMap::new()));
|
||||
static FILTER_PLUGIN_REGISTRY: std::sync::LazyLock<Mutex<HashMap<String, FilterConstructor>>> =
|
||||
std::sync::LazyLock::new(|| Mutex::new(HashMap::new()));
|
||||
|
||||
/// Registers a filter plugin in the global registry.
|
||||
///
|
||||
|
||||
@@ -1,3 +1,8 @@
|
||||
/// Business logic services for the Keep application.
|
||||
///
|
||||
/// This module provides the core service layer that orchestrates item storage,
|
||||
/// compression, metadata collection, and filtering. Services are used by both
|
||||
/// local CLI modes and the HTTP server.
|
||||
pub mod compression_service;
|
||||
pub mod error;
|
||||
pub mod filter_service;
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use anyhow::{Result, bail};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
/// Supported LLM token encodings.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
||||
@@ -48,10 +47,10 @@ impl std::fmt::Debug for Tokenizer {
|
||||
}
|
||||
|
||||
/// Static tokenizer instances — loaded once per process, shared across all plugins.
|
||||
static CL100K: Lazy<Tokenizer> = Lazy::new(|| {
|
||||
static CL100K: std::sync::LazyLock<Tokenizer> = std::sync::LazyLock::new(|| {
|
||||
Tokenizer::new(TokenEncoding::Cl100kBase).expect("Failed to create cl100k_base tokenizer")
|
||||
});
|
||||
static O200K: Lazy<Tokenizer> = Lazy::new(|| {
|
||||
static O200K: std::sync::LazyLock<Tokenizer> = std::sync::LazyLock::new(|| {
|
||||
Tokenizer::new(TokenEncoding::O200kBase).expect("Failed to create o200k_base tokenizer")
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user