Compare commits
3 Commits
ab2fb07505
...
e2cb36d2a8
| Author | SHA1 | Date | |
|---|---|---|---|
| e2cb36d2a8 | |||
| 0004324301 | |||
| b3edfe7de6 |
26
CHANGELOG.md
26
CHANGELOG.md
@@ -7,6 +7,32 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- Database index on `items(ts)` column for faster ORDER BY sorting
|
||||||
|
- Server API `ItemInfo` now includes `file_size` — actual filesystem-reported size of the item data file
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
- Filter plugins check size before loading content into memory (prevents OOM on large inputs)
|
||||||
|
- Status page pre-allocates collections with known capacities (meta plugins, compression info)
|
||||||
|
- `#[inline]` on HTML escape helper functions (`esc`, `esc_attr`) for hot path performance
|
||||||
|
- Removed `once_cell` crate (replaced with `std::sync::LazyLock` from Rust 1.80)
|
||||||
|
- Removed `lazy_static` crate (replaced with `std::sync::LazyLock`)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- CLI help text typo: "metatdata" → "metadata" in `--get` and `--info` descriptions
|
||||||
|
|
||||||
|
### Refactored
|
||||||
|
|
||||||
|
- Added module-level documentation to `services/` module
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
|
||||||
|
- README.md: Fixed compression table — zstd is native (not external), "none" renamed to "raw"
|
||||||
|
- DESIGN.md: Updated schema to reflect current `items` table columns and meta plugin inventory
|
||||||
|
|
||||||
## [0.1.0] - 2026-03-21
|
## [0.1.0] - 2026-03-21
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|||||||
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -1727,7 +1727,6 @@ dependencies = [
|
|||||||
"inventory",
|
"inventory",
|
||||||
"is-terminal",
|
"is-terminal",
|
||||||
"jsonwebtoken",
|
"jsonwebtoken",
|
||||||
"lazy_static",
|
|
||||||
"libc",
|
"libc",
|
||||||
"local-ip-address",
|
"local-ip-address",
|
||||||
"log",
|
"log",
|
||||||
@@ -1735,7 +1734,6 @@ dependencies = [
|
|||||||
"magic",
|
"magic",
|
||||||
"md5",
|
"md5",
|
||||||
"nix",
|
"nix",
|
||||||
"once_cell",
|
|
||||||
"os_pipe",
|
"os_pipe",
|
||||||
"pest",
|
"pest",
|
||||||
"pest_derive",
|
"pest_derive",
|
||||||
|
|||||||
@@ -35,7 +35,6 @@ hyper = { version = "1.0", features = ["full"] }
|
|||||||
http-body-util = "0.1"
|
http-body-util = "0.1"
|
||||||
inventory = "0.3"
|
inventory = "0.3"
|
||||||
is-terminal = "0.4"
|
is-terminal = "0.4"
|
||||||
lazy_static = "1.5"
|
|
||||||
libc = "0.2"
|
libc = "0.2"
|
||||||
local-ip-address = "0.6"
|
local-ip-address = "0.6"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
@@ -45,7 +44,6 @@ magic = { version = "0.13", optional = true }
|
|||||||
infer = { version = "0.19", optional = true }
|
infer = { version = "0.19", optional = true }
|
||||||
tree_magic_mini = { version = "3.2", optional = true }
|
tree_magic_mini = { version = "3.2", optional = true }
|
||||||
nix = { version = "0.30", features = ["fs", "process"] }
|
nix = { version = "0.30", features = ["fs", "process"] }
|
||||||
once_cell = "1.21"
|
|
||||||
comfy-table = "7.2"
|
comfy-table = "7.2"
|
||||||
pwhash = "1.0"
|
pwhash = "1.0"
|
||||||
regex = "1.10"
|
regex = "1.10"
|
||||||
|
|||||||
41
DESIGN.md
41
DESIGN.md
@@ -117,7 +117,7 @@
|
|||||||
## Data Storage
|
## Data Storage
|
||||||
|
|
||||||
### Database Schema
|
### Database Schema
|
||||||
- `items` table: id (primary key), ts (timestamp), size (optional), compression
|
- `items` table: id (primary key), ts (timestamp), uncompressed_size (optional), compressed_size (optional), closed (boolean), compression
|
||||||
- `tags` table: id (foreign key to items), name (tag name)
|
- `tags` table: id (foreign key to items), name (tag name)
|
||||||
- `metas` table: id (foreign key to items), name (meta key), value (meta value)
|
- `metas` table: id (foreign key to items), name (meta key), value (meta value)
|
||||||
- Indexes on tag names and meta names for faster queries
|
- Indexes on tag names and meta names for faster queries
|
||||||
@@ -178,26 +178,25 @@
|
|||||||
- None (no compression)
|
- None (no compression)
|
||||||
|
|
||||||
## Supported Meta Plugins
|
## Supported Meta Plugins
|
||||||
- FileMagic - File type detection using file command
|
|
||||||
- FileMime - MIME type detection using file command
|
Meta plugins collect metadata during item save. Each plugin produces one or more key-value pairs:
|
||||||
- FileEncoding - File encoding detection using file command
|
|
||||||
- LineCount - Line count using wc command
|
- `magic_file` - File type detection using libmagic (when `magic` feature enabled)
|
||||||
- WordCount - Word count using wc command
|
- `infer` - MIME type detection using infer crate (when `infer` feature enabled)
|
||||||
- Cwd - Current working directory
|
- `tree_magic_mini` - MIME type detection using tree_magic_mini (when `tree_magic_mini` feature enabled)
|
||||||
- Binary - Binary file detection
|
- `tokens` - LLM token counting using tiktoken (when `tokens` feature enabled)
|
||||||
- Uid - Current user ID
|
- `text` - Text analysis: line count, word count, char count, line average length
|
||||||
- User - Current username
|
- `digest` - SHA-256 and MD5 checksums
|
||||||
- Gid - Current group ID
|
- `hostname` - System hostname (full and short)
|
||||||
- Group - Current group name
|
- `cwd` - Current working directory
|
||||||
- Shell - Shell path from SHELL environment variable
|
- `user` - Current username and UID
|
||||||
- ShellPid - Shell process ID from PPID environment variable
|
- `shell` - Shell path from SHELL environment variable
|
||||||
- KeepPid - Keep process ID
|
- `shell_pid` - Shell process ID from PPID
|
||||||
- DigestSha256 - SHA-256 digest
|
- `keep_pid` - Keep process ID
|
||||||
- DigestMd5 - MD5 digest using md5sum command
|
- `env` - Arbitrary environment variables (via `KEEP_META_ENV_*` prefix)
|
||||||
- ReadTime - Time taken to read data
|
- `exec` - Execute external commands for custom metadata
|
||||||
- ReadRate - Rate of data reading
|
- `read_time` - Time taken to read content
|
||||||
- Hostname - System hostname
|
- `read_rate` - Content read rate (bytes/second)
|
||||||
- FullHostname - Fully qualified domain name
|
|
||||||
|
|
||||||
## Testing Strategy
|
## Testing Strategy
|
||||||
- Unit tests for each module in `src/tests/`
|
- Unit tests for each module in `src/tests/`
|
||||||
|
|||||||
@@ -345,8 +345,8 @@ Items are compressed automatically on save. Default: LZ4.
|
|||||||
| `gzip` | Internal | Fast | Good |
|
| `gzip` | Internal | Fast | Good |
|
||||||
| `bzip2` | External | Slow | Better |
|
| `bzip2` | External | Slow | Better |
|
||||||
| `xz` | External | Slowest | Best |
|
| `xz` | External | Slowest | Best |
|
||||||
| `zstd` | External | Fast | Good |
|
| `zstd` | Internal | Fast | Good |
|
||||||
| `none` | Internal | N/A | N/A |
|
| `raw` | Internal | N/A | N/A |
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
# Specify compression per item
|
# Specify compression per item
|
||||||
|
|||||||
@@ -29,9 +29,7 @@ pub struct ModeArgs {
|
|||||||
pub save: bool,
|
pub save: bool,
|
||||||
|
|
||||||
#[arg(group("mode"), help_heading("Mode Options"), short, long, conflicts_with_all(["save", "diff", "list", "delete", "info", "update", "status", "export", "import"]))]
|
#[arg(group("mode"), help_heading("Mode Options"), short, long, conflicts_with_all(["save", "diff", "list", "delete", "info", "update", "status", "export", "import"]))]
|
||||||
#[arg(help(
|
#[arg(help("Get an item either by its ID or by a combination of matching tags and metadata"))]
|
||||||
"Get an item either by it's ID or by a combination of matching tags and metatdata"
|
|
||||||
))]
|
|
||||||
pub get: bool,
|
pub get: bool,
|
||||||
|
|
||||||
#[arg(group("mode"), help_heading("Mode Options"), long, conflicts_with_all(["save", "get", "list", "delete", "info", "update", "status", "export", "import"]))]
|
#[arg(group("mode"), help_heading("Mode Options"), long, conflicts_with_all(["save", "get", "list", "delete", "info", "update", "status", "export", "import"]))]
|
||||||
@@ -48,9 +46,7 @@ pub struct ModeArgs {
|
|||||||
pub delete: bool,
|
pub delete: bool,
|
||||||
|
|
||||||
#[arg(group("mode"), help_heading("Mode Options"), short, long, conflicts_with_all(["save", "get", "diff", "list", "delete", "update", "status", "export", "import"]))]
|
#[arg(group("mode"), help_heading("Mode Options"), short, long, conflicts_with_all(["save", "get", "diff", "list", "delete", "update", "status", "export", "import"]))]
|
||||||
#[arg(help(
|
#[arg(help("Get an item either by its ID or by a combination of matching tags and metadata"))]
|
||||||
"Get an item either by it's ID or by a combination of matching tags and metatdata"
|
|
||||||
))]
|
|
||||||
pub info: bool,
|
pub info: bool,
|
||||||
|
|
||||||
#[arg(group("mode"), help_heading("Mode Options"), short('u'), long, conflicts_with_all(["save", "get", "diff", "list", "delete", "info", "status", "export", "import"]))]
|
#[arg(group("mode"), help_heading("Mode Options"), short('u'), long, conflicts_with_all(["save", "get", "diff", "list", "delete", "info", "status", "export", "import"]))]
|
||||||
|
|||||||
@@ -89,7 +89,7 @@ pub fn generate_status_info(
|
|||||||
};
|
};
|
||||||
|
|
||||||
let _default_type = crate::compression_engine::default_compression_type();
|
let _default_type = crate::compression_engine::default_compression_type();
|
||||||
let mut compression_info = Vec::new();
|
let mut compression_info = Vec::with_capacity(CompressionType::iter().count());
|
||||||
|
|
||||||
// Sort compression types by their string representation
|
// Sort compression types by their string representation
|
||||||
let mut sorted_compression_types: Vec<CompressionType> = CompressionType::iter().collect();
|
let mut sorted_compression_types: Vec<CompressionType> = CompressionType::iter().collect();
|
||||||
@@ -141,7 +141,8 @@ pub fn generate_status_info(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut meta_plugins_map = std::collections::HashMap::new();
|
let mut meta_plugins_map =
|
||||||
|
std::collections::HashMap::with_capacity(MetaPluginType::iter().count());
|
||||||
let mut enabled_meta_plugins_vec = Vec::new();
|
let mut enabled_meta_plugins_vec = Vec::new();
|
||||||
|
|
||||||
// Sort meta plugin types by their string representation to avoid creating plugins just for sorting
|
// Sort meta plugin types by their string representation to avoid creating plugins just for sorting
|
||||||
|
|||||||
@@ -7,8 +7,6 @@ use strum::{Display, EnumIter, EnumString};
|
|||||||
|
|
||||||
use log::*;
|
use log::*;
|
||||||
|
|
||||||
use lazy_static::lazy_static;
|
|
||||||
|
|
||||||
extern crate enum_map;
|
extern crate enum_map;
|
||||||
use enum_map::enum_map;
|
use enum_map::enum_map;
|
||||||
use enum_map::{Enum, EnumMap};
|
use enum_map::{Enum, EnumMap};
|
||||||
@@ -180,63 +178,65 @@ impl Clone for Box<dyn CompressionEngine> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lazy_static! {
|
fn init_compression_engines() -> EnumMap<CompressionType, Box<dyn CompressionEngine>> {
|
||||||
static ref COMPRESSION_ENGINES: EnumMap<CompressionType, Box<dyn CompressionEngine>> = {
|
#[allow(unused_mut)]
|
||||||
#[allow(unused_mut)] // mut needed when gzip/lz4 features are enabled
|
let mut em: EnumMap<CompressionType, Box<dyn CompressionEngine>> = enum_map! {
|
||||||
let mut em = enum_map! {
|
CompressionType::LZ4 => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
||||||
CompressionType::LZ4 => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
"lz4",
|
||||||
"lz4",
|
vec!["-c"],
|
||||||
vec!["-c"],
|
vec!["-d", "-c"]
|
||||||
vec!["-d", "-c"]
|
)) as Box<dyn CompressionEngine>,
|
||||||
)) as Box<dyn CompressionEngine>,
|
CompressionType::GZip => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
||||||
CompressionType::GZip => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
"gzip",
|
||||||
"gzip",
|
vec!["-c"],
|
||||||
vec!["-c"],
|
vec!["-d", "-c"]
|
||||||
vec!["-d", "-c"]
|
)) as Box<dyn CompressionEngine>,
|
||||||
)) as Box<dyn CompressionEngine>,
|
CompressionType::BZip2 => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
||||||
CompressionType::BZip2 => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
"bzip2",
|
||||||
"bzip2",
|
vec!["-c"],
|
||||||
vec!["-c"],
|
vec!["-d", "-c"]
|
||||||
vec!["-d", "-c"]
|
)) as Box<dyn CompressionEngine>,
|
||||||
)) as Box<dyn CompressionEngine>,
|
CompressionType::XZ => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
||||||
CompressionType::XZ => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
"xz",
|
||||||
"xz",
|
vec!["-c"],
|
||||||
vec!["-c"],
|
vec!["-d", "-c"]
|
||||||
vec!["-d", "-c"]
|
)) as Box<dyn CompressionEngine>,
|
||||||
)) as Box<dyn CompressionEngine>,
|
CompressionType::ZStd => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
||||||
CompressionType::ZStd => Box::new(crate::compression_engine::program::CompressionEngineProgram::new(
|
"zstd",
|
||||||
"zstd",
|
vec!["-c"],
|
||||||
vec!["-c"],
|
vec!["-d", "-c"]
|
||||||
vec!["-d", "-c"]
|
)) as Box<dyn CompressionEngine>,
|
||||||
)) as Box<dyn CompressionEngine>,
|
CompressionType::Raw => Box::new(crate::compression_engine::raw::CompressionEngineRaw::new()) as Box<dyn CompressionEngine>
|
||||||
CompressionType::Raw => Box::new(crate::compression_engine::raw::CompressionEngineRaw::new()) as Box<dyn CompressionEngine>
|
|
||||||
};
|
|
||||||
|
|
||||||
#[cfg(feature = "gzip")]
|
|
||||||
{
|
|
||||||
em[CompressionType::GZip] =
|
|
||||||
Box::new(crate::compression_engine::gzip::CompressionEngineGZip::new())
|
|
||||||
as Box<dyn CompressionEngine>;
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(feature = "lz4")]
|
|
||||||
{
|
|
||||||
em[CompressionType::LZ4] =
|
|
||||||
Box::new(crate::compression_engine::lz4::CompressionEngineLZ4::new())
|
|
||||||
as Box<dyn CompressionEngine>;
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(feature = "zstd")]
|
|
||||||
{
|
|
||||||
em[CompressionType::ZStd] =
|
|
||||||
Box::new(crate::compression_engine::zstd::CompressionEngineZstd::new())
|
|
||||||
as Box<dyn CompressionEngine>;
|
|
||||||
}
|
|
||||||
|
|
||||||
em
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#[cfg(feature = "gzip")]
|
||||||
|
{
|
||||||
|
em[CompressionType::GZip] =
|
||||||
|
Box::new(crate::compression_engine::gzip::CompressionEngineGZip::new())
|
||||||
|
as Box<dyn CompressionEngine>;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "lz4")]
|
||||||
|
{
|
||||||
|
em[CompressionType::LZ4] =
|
||||||
|
Box::new(crate::compression_engine::lz4::CompressionEngineLZ4::new())
|
||||||
|
as Box<dyn CompressionEngine>;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "zstd")]
|
||||||
|
{
|
||||||
|
em[CompressionType::ZStd] =
|
||||||
|
Box::new(crate::compression_engine::zstd::CompressionEngineZstd::new())
|
||||||
|
as Box<dyn CompressionEngine>;
|
||||||
|
}
|
||||||
|
|
||||||
|
em
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static COMPRESSION_ENGINES: std::sync::LazyLock<
|
||||||
|
EnumMap<CompressionType, Box<dyn CompressionEngine>>,
|
||||||
|
> = std::sync::LazyLock::new(init_compression_engines);
|
||||||
|
|
||||||
pub fn default_compression_type() -> CompressionType {
|
pub fn default_compression_type() -> CompressionType {
|
||||||
CompressionType::LZ4
|
CompressionType::LZ4
|
||||||
}
|
}
|
||||||
|
|||||||
20
src/db.rs
20
src/db.rs
@@ -1,6 +1,5 @@
|
|||||||
use anyhow::{Context, Error, Result, anyhow};
|
use anyhow::{Context, Error, Result, anyhow};
|
||||||
use chrono::prelude::*;
|
use chrono::prelude::*;
|
||||||
use lazy_static::lazy_static;
|
|
||||||
use log::*;
|
use log::*;
|
||||||
use rusqlite::{Connection, OpenFlags, Row, params};
|
use rusqlite::{Connection, OpenFlags, Row, params};
|
||||||
use rusqlite_migration::{M, Migrations};
|
use rusqlite_migration::{M, Migrations};
|
||||||
@@ -47,25 +46,21 @@ let id = db::insert_item(&conn, item)?;
|
|||||||
```
|
```
|
||||||
*/
|
*/
|
||||||
|
|
||||||
lazy_static! {
|
static MIGRATIONS: std::sync::LazyLock<Migrations<'static>> = std::sync::LazyLock::new(|| {
|
||||||
// Database schema migrations for the Keep application.
|
Migrations::new(vec![
|
||||||
//
|
|
||||||
// Defines the sequence of migrations to create and update the schema.
|
|
||||||
// Applied automatically when opening a database connection.
|
|
||||||
static ref MIGRATIONS: Migrations<'static> = Migrations::new(vec![
|
|
||||||
M::up(
|
M::up(
|
||||||
"CREATE TABLE items(
|
"CREATE TABLE items(
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||||
ts TEXT NOT NULL,
|
ts TEXT NOT NULL,
|
||||||
size INTEGER NULL,
|
size INTEGER NULL,
|
||||||
compression TEXT NOT NULL)"
|
compression TEXT NOT NULL)",
|
||||||
),
|
),
|
||||||
M::up(
|
M::up(
|
||||||
"CREATE TABLE tags (
|
"CREATE TABLE tags (
|
||||||
id INTEGER NOT NULL,
|
id INTEGER NOT NULL,
|
||||||
name TEXT NOT NULL,
|
name TEXT NOT NULL,
|
||||||
FOREIGN KEY(id) REFERENCES items(id) ON DELETE CASCADE,
|
FOREIGN KEY(id) REFERENCES items(id) ON DELETE CASCADE,
|
||||||
PRIMARY KEY(id, name));"
|
PRIMARY KEY(id, name));",
|
||||||
),
|
),
|
||||||
M::up(
|
M::up(
|
||||||
"CREATE TABLE metas (
|
"CREATE TABLE metas (
|
||||||
@@ -73,16 +68,17 @@ lazy_static! {
|
|||||||
name TEXT NOT NULL,
|
name TEXT NOT NULL,
|
||||||
value TEXT NOT NULL,
|
value TEXT NOT NULL,
|
||||||
FOREIGN KEY(id) REFERENCES items(id) ON DELETE CASCADE,
|
FOREIGN KEY(id) REFERENCES items(id) ON DELETE CASCADE,
|
||||||
PRIMARY KEY(id, name));"
|
PRIMARY KEY(id, name));",
|
||||||
),
|
),
|
||||||
M::up("CREATE INDEX idx_tags_name ON tags(name)"),
|
M::up("CREATE INDEX idx_tags_name ON tags(name)"),
|
||||||
M::up("CREATE INDEX idx_metas_name ON metas(name)"),
|
M::up("CREATE INDEX idx_metas_name ON metas(name)"),
|
||||||
|
M::up("CREATE INDEX idx_items_ts ON items(ts)"),
|
||||||
M::up("UPDATE items SET compression = 'raw' WHERE compression = 'none'"),
|
M::up("UPDATE items SET compression = 'raw' WHERE compression = 'none'"),
|
||||||
M::up("ALTER TABLE items RENAME COLUMN size TO uncompressed_size"),
|
M::up("ALTER TABLE items RENAME COLUMN size TO uncompressed_size"),
|
||||||
M::up("ALTER TABLE items ADD COLUMN compressed_size INTEGER NULL"),
|
M::up("ALTER TABLE items ADD COLUMN compressed_size INTEGER NULL"),
|
||||||
M::up("ALTER TABLE items ADD COLUMN closed BOOLEAN NOT NULL DEFAULT 1"),
|
M::up("ALTER TABLE items ADD COLUMN closed BOOLEAN NOT NULL DEFAULT 1"),
|
||||||
]);
|
])
|
||||||
}
|
});
|
||||||
|
|
||||||
/// Represents an item stored in the database.
|
/// Represents an item stored in the database.
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -213,6 +213,44 @@ pub enum FilterType {
|
|||||||
/// Prevents OOM on large files by rejecting inputs that exceed this limit.
|
/// Prevents OOM on large files by rejecting inputs that exceed this limit.
|
||||||
const MAX_FILTER_BUFFER_SIZE: usize = 256 * 1024 * 1024;
|
const MAX_FILTER_BUFFER_SIZE: usize = 256 * 1024 * 1024;
|
||||||
|
|
||||||
|
struct BoundedVecWriter {
|
||||||
|
data: Vec<u8>,
|
||||||
|
limit: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BoundedVecWriter {
|
||||||
|
fn new(limit: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
data: Vec::new(),
|
||||||
|
limit,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn into_inner(self) -> Vec<u8> {
|
||||||
|
self.data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::io::Write for BoundedVecWriter {
|
||||||
|
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||||
|
if self.data.len() + buf.len() > self.limit {
|
||||||
|
return Err(std::io::Error::new(
|
||||||
|
std::io::ErrorKind::InvalidData,
|
||||||
|
format!(
|
||||||
|
"Input size exceeds maximum filter buffer size ({} bytes)",
|
||||||
|
MAX_FILTER_BUFFER_SIZE
|
||||||
|
),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
self.data.write_all(buf)?;
|
||||||
|
Ok(buf.len())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn flush(&mut self) -> std::io::Result<()> {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A chain of filter plugins applied sequentially.
|
/// A chain of filter plugins applied sequentially.
|
||||||
///
|
///
|
||||||
/// Chains multiple filters, applying them in order to the input stream.
|
/// Chains multiple filters, applying them in order to the input stream.
|
||||||
@@ -360,21 +398,10 @@ impl FilterChain {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// For multiple plugins, we need to chain them together
|
// For multiple plugins, we need to chain them together
|
||||||
// We'll use a temporary buffer to hold intermediate results
|
// We'll use a bounded buffer to hold intermediate results
|
||||||
let mut current_data = Vec::new();
|
let mut bounded_writer = BoundedVecWriter::new(MAX_FILTER_BUFFER_SIZE);
|
||||||
std::io::copy(reader, &mut current_data)?;
|
std::io::copy(reader, &mut bounded_writer)?;
|
||||||
|
let mut current_data = bounded_writer.into_inner();
|
||||||
if current_data.len() > MAX_FILTER_BUFFER_SIZE {
|
|
||||||
return Err(std::io::Error::new(
|
|
||||||
std::io::ErrorKind::InvalidData,
|
|
||||||
format!(
|
|
||||||
"Input size ({} bytes) exceeds maximum filter buffer size ({} bytes). \
|
|
||||||
Consider using fewer filter plugins or smaller inputs.",
|
|
||||||
current_data.len(),
|
|
||||||
MAX_FILTER_BUFFER_SIZE
|
|
||||||
),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store the plugins length to avoid borrowing issues
|
// Store the plugins length to avoid borrowing issues
|
||||||
let plugins_len = self.plugins.len();
|
let plugins_len = self.plugins.len();
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
use log::{debug, warn};
|
use log::{debug, warn};
|
||||||
use once_cell::sync::Lazy;
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
@@ -444,9 +443,9 @@ where
|
|||||||
///
|
///
|
||||||
/// An empty `HashMap` (default implementation).
|
/// An empty `HashMap` (default implementation).
|
||||||
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
||||||
use once_cell::sync::Lazy;
|
use std::sync::LazyLock;
|
||||||
static EMPTY: Lazy<std::collections::HashMap<String, serde_yaml::Value>> =
|
static EMPTY: LazyLock<std::collections::HashMap<String, serde_yaml::Value>> =
|
||||||
Lazy::new(std::collections::HashMap::new);
|
LazyLock::new(std::collections::HashMap::new);
|
||||||
&EMPTY
|
&EMPTY
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -471,9 +470,9 @@ where
|
|||||||
///
|
///
|
||||||
/// An empty `HashMap` (default implementation).
|
/// An empty `HashMap` (default implementation).
|
||||||
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
||||||
use once_cell::sync::Lazy;
|
use std::sync::LazyLock;
|
||||||
static EMPTY: Lazy<std::collections::HashMap<String, serde_yaml::Value>> =
|
static EMPTY: LazyLock<std::collections::HashMap<String, serde_yaml::Value>> =
|
||||||
Lazy::new(std::collections::HashMap::new);
|
LazyLock::new(std::collections::HashMap::new);
|
||||||
&EMPTY
|
&EMPTY
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -602,8 +601,9 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Global registry for meta plugins.
|
/// Global registry for meta plugins.
|
||||||
static META_PLUGIN_REGISTRY: Lazy<Mutex<HashMap<MetaPluginType, PluginConstructor>>> =
|
static META_PLUGIN_REGISTRY: std::sync::LazyLock<
|
||||||
Lazy::new(|| Mutex::new(HashMap::new()));
|
Mutex<HashMap<MetaPluginType, PluginConstructor>>,
|
||||||
|
> = std::sync::LazyLock::new(|| Mutex::new(HashMap::new()));
|
||||||
|
|
||||||
/// Register a meta plugin with the global registry.
|
/// Register a meta plugin with the global registry.
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -21,7 +21,6 @@ use chrono::{DateTime, Utc};
|
|||||||
use clap::Command;
|
use clap::Command;
|
||||||
use clap::error::ErrorKind;
|
use clap::error::ErrorKind;
|
||||||
use comfy_table::{Attribute, Cell, ContentArrangement, Table};
|
use comfy_table::{Attribute, Cell, ContentArrangement, Table};
|
||||||
use lazy_static::lazy_static;
|
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
@@ -57,9 +56,8 @@ pub enum OutputFormat {
|
|||||||
Yaml,
|
Yaml,
|
||||||
}
|
}
|
||||||
|
|
||||||
lazy_static! {
|
static KEEP_META_RE: std::sync::LazyLock<Regex> =
|
||||||
static ref KEEP_META_RE: Regex = Regex::new(r"^KEEP_META_(.+)$").unwrap();
|
std::sync::LazyLock::new(|| Regex::new(r"^KEEP_META_(.+)$").unwrap());
|
||||||
}
|
|
||||||
|
|
||||||
pub const IMPORT_FORMAT_ERROR: &str =
|
pub const IMPORT_FORMAT_ERROR: &str =
|
||||||
"Unsupported import format: {} (expected .keep.tar or .meta.yml)";
|
"Unsupported import format: {} (expected .keep.tar or .meta.yml)";
|
||||||
|
|||||||
@@ -178,6 +178,7 @@ pub async fn handle_list_items(
|
|||||||
let item_infos: Vec<ItemInfo> = items_with_meta
|
let item_infos: Vec<ItemInfo> = items_with_meta
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|iwm| ItemInfo::try_from(iwm).ok())
|
.filter_map(|iwm| ItemInfo::try_from(iwm).ok())
|
||||||
|
.map(|info| info.with_file_size(&state.data_dir))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
ResponseBuilder::json(ApiResponse::ok(item_infos))
|
ResponseBuilder::json(ApiResponse::ok(item_infos))
|
||||||
@@ -339,6 +340,7 @@ pub async fn handle_post_item(
|
|||||||
let db = state.db.clone();
|
let db = state.db.clone();
|
||||||
let item_service = state.item_service.clone();
|
let item_service = state.item_service.clone();
|
||||||
let settings = state.settings.clone();
|
let settings = state.settings.clone();
|
||||||
|
let data_dir = state.data_dir.clone();
|
||||||
|
|
||||||
// Parse tags from query parameter
|
// Parse tags from query parameter
|
||||||
let tags: Vec<String> = params
|
let tags: Vec<String> = params
|
||||||
@@ -472,10 +474,12 @@ pub async fn handle_post_item(
|
|||||||
return Err(StatusCode::PAYLOAD_TOO_LARGE);
|
return Err(StatusCode::PAYLOAD_TOO_LARGE);
|
||||||
}
|
}
|
||||||
|
|
||||||
let item_info = ItemInfo::try_from(item_with_meta).map_err(|e| {
|
let item_info = ItemInfo::try_from(item_with_meta)
|
||||||
warn!("Item conversion failed: {e}");
|
.map(|info| info.with_file_size(&data_dir))
|
||||||
StatusCode::INTERNAL_SERVER_ERROR
|
.map_err(|e| {
|
||||||
})?;
|
warn!("Item conversion failed: {e}");
|
||||||
|
StatusCode::INTERNAL_SERVER_ERROR
|
||||||
|
})?;
|
||||||
|
|
||||||
Ok(Json(ApiResponse::ok(item_info)))
|
Ok(Json(ApiResponse::ok(item_info)))
|
||||||
}
|
}
|
||||||
@@ -1092,6 +1096,7 @@ pub async fn handle_delete_item(
|
|||||||
compression: deleted_item.compression,
|
compression: deleted_item.compression,
|
||||||
tags: vec![],
|
tags: vec![],
|
||||||
metadata: HashMap::new(),
|
metadata: HashMap::new(),
|
||||||
|
file_size: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(Json(ApiResponse::ok(item_info)))
|
Ok(Json(ApiResponse::ok(item_info)))
|
||||||
@@ -1124,6 +1129,7 @@ pub async fn handle_get_item_info(
|
|||||||
|
|
||||||
let db = state.db.clone();
|
let db = state.db.clone();
|
||||||
let item_service = state.item_service.clone();
|
let item_service = state.item_service.clone();
|
||||||
|
let data_dir = state.data_dir.clone();
|
||||||
|
|
||||||
let item_with_meta = task::spawn_blocking(move || {
|
let item_with_meta = task::spawn_blocking(move || {
|
||||||
let conn = db.blocking_lock();
|
let conn = db.blocking_lock();
|
||||||
@@ -1136,10 +1142,12 @@ pub async fn handle_get_item_info(
|
|||||||
})?
|
})?
|
||||||
.map_err(handle_item_error)?;
|
.map_err(handle_item_error)?;
|
||||||
|
|
||||||
let item_info = ItemInfo::try_from(item_with_meta).map_err(|e| {
|
let item_info = ItemInfo::try_from(item_with_meta)
|
||||||
warn!("Item conversion failed: {e}");
|
.map(|info| info.with_file_size(&data_dir))
|
||||||
StatusCode::INTERNAL_SERVER_ERROR
|
.map_err(|e| {
|
||||||
})?;
|
warn!("Item conversion failed: {e}");
|
||||||
|
StatusCode::INTERNAL_SERVER_ERROR
|
||||||
|
})?;
|
||||||
|
|
||||||
Ok(Json(ApiResponse::ok(item_info)))
|
Ok(Json(ApiResponse::ok(item_info)))
|
||||||
}
|
}
|
||||||
@@ -1352,6 +1360,7 @@ pub async fn handle_update_item(
|
|||||||
let db = state.db.clone();
|
let db = state.db.clone();
|
||||||
let item_service = state.item_service.clone();
|
let item_service = state.item_service.clone();
|
||||||
let settings = state.settings.clone();
|
let settings = state.settings.clone();
|
||||||
|
let data_dir = state.data_dir.clone();
|
||||||
let size_param = params.uncompressed_size;
|
let size_param = params.uncompressed_size;
|
||||||
|
|
||||||
let item_info = task::spawn_blocking(move || {
|
let item_info = task::spawn_blocking(move || {
|
||||||
@@ -1369,10 +1378,12 @@ pub async fn handle_update_item(
|
|||||||
return Err(StatusCode::INTERNAL_SERVER_ERROR);
|
return Err(StatusCode::INTERNAL_SERVER_ERROR);
|
||||||
}
|
}
|
||||||
match item_service.get_item(&conn, item_id) {
|
match item_service.get_item(&conn, item_id) {
|
||||||
Ok(iwm) => ItemInfo::try_from(iwm).map_err(|e| {
|
Ok(iwm) => ItemInfo::try_from(iwm)
|
||||||
warn!("Item conversion failed: {e}");
|
.map(|info| info.with_file_size(&data_dir))
|
||||||
StatusCode::INTERNAL_SERVER_ERROR
|
.map_err(|e| {
|
||||||
}),
|
warn!("Item conversion failed: {e}");
|
||||||
|
StatusCode::INTERNAL_SERVER_ERROR
|
||||||
|
}),
|
||||||
Err(_) => Err(StatusCode::INTERNAL_SERVER_ERROR),
|
Err(_) => Err(StatusCode::INTERNAL_SERVER_ERROR),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1392,10 +1403,12 @@ pub async fn handle_update_item(
|
|||||||
);
|
);
|
||||||
|
|
||||||
match result {
|
match result {
|
||||||
Ok(item_with_meta) => ItemInfo::try_from(item_with_meta).map_err(|e| {
|
Ok(item_with_meta) => ItemInfo::try_from(item_with_meta)
|
||||||
warn!("Item conversion failed: {e}");
|
.map(|info| info.with_file_size(&data_dir))
|
||||||
StatusCode::INTERNAL_SERVER_ERROR
|
.map_err(|e| {
|
||||||
}),
|
warn!("Item conversion failed: {e}");
|
||||||
|
StatusCode::INTERNAL_SERVER_ERROR
|
||||||
|
}),
|
||||||
Err(CoreError::ItemNotFound(_)) => Err(StatusCode::NOT_FOUND),
|
Err(CoreError::ItemNotFound(_)) => Err(StatusCode::NOT_FOUND),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!("Failed to update item {item_id}: {e}");
|
warn!("Failed to update item {item_id}: {e}");
|
||||||
|
|||||||
@@ -366,10 +366,13 @@ pub struct StatusInfoResponse {
|
|||||||
/// let item_info = ItemInfo {
|
/// let item_info = ItemInfo {
|
||||||
/// id: 42,
|
/// id: 42,
|
||||||
/// ts: "2023-12-01T15:30:45Z".to_string(),
|
/// ts: "2023-12-01T15:30:45Z".to_string(),
|
||||||
/// size: Some(1024),
|
/// uncompressed_size: Some(1024),
|
||||||
|
/// compressed_size: Some(512),
|
||||||
|
/// closed: true,
|
||||||
/// compression: "gzip".to_string(),
|
/// compression: "gzip".to_string(),
|
||||||
/// tags: vec!["important".to_string()],
|
/// tags: vec!["important".to_string()],
|
||||||
/// metadata: HashMap::from([("mime_type".to_string(), "text/plain".to_string())]),
|
/// metadata: HashMap::from([("mime_type".to_string(), "text/plain".to_string())]),
|
||||||
|
/// file_size: Some(512),
|
||||||
/// };
|
/// };
|
||||||
/// ```
|
/// ```
|
||||||
#[derive(Serialize, Deserialize, ToSchema)]
|
#[derive(Serialize, Deserialize, ToSchema)]
|
||||||
@@ -413,6 +416,33 @@ pub struct ItemInfo {
|
|||||||
/// Key-value pairs containing additional metadata about the item.
|
/// Key-value pairs containing additional metadata about the item.
|
||||||
#[schema(example = json!({"mime_type": "text/plain", "mime_encoding": "utf-8", "line_count": "42"}))]
|
#[schema(example = json!({"mime_type": "text/plain", "mime_encoding": "utf-8", "line_count": "42"}))]
|
||||||
pub metadata: HashMap<String, String>,
|
pub metadata: HashMap<String, String>,
|
||||||
|
/// Actual file size in bytes.
|
||||||
|
///
|
||||||
|
/// The filesystem-reported size of the item's data file. This may differ from
|
||||||
|
/// `compressed_size` if the file was written and the database hasn't been updated.
|
||||||
|
/// None if the file cannot be read (e.g., file not found, permission denied).
|
||||||
|
#[schema(example = 512)]
|
||||||
|
pub file_size: Option<i64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ItemInfo {
|
||||||
|
/// Enriches this `ItemInfo` with the actual filesystem-reported size.
|
||||||
|
///
|
||||||
|
/// Reads the size of the item's data file from disk and sets `file_size`.
|
||||||
|
/// If the file cannot be read, `file_size` is left as None.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `data_dir` - The data directory path containing item files.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// A new `ItemInfo` with `file_size` populated from the filesystem.
|
||||||
|
pub fn with_file_size(mut self, data_dir: &std::path::Path) -> Self {
|
||||||
|
let item_path = data_dir.join(self.id.to_string());
|
||||||
|
self.file_size = std::fs::metadata(&item_path).map(|m| m.len() as i64).ok();
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TryFrom<ItemWithMeta> for ItemInfo {
|
impl TryFrom<ItemWithMeta> for ItemInfo {
|
||||||
@@ -433,6 +463,7 @@ impl TryFrom<ItemWithMeta> for ItemInfo {
|
|||||||
compression: item_with_meta.item.compression,
|
compression: item_with_meta.item.compression,
|
||||||
tags,
|
tags,
|
||||||
metadata,
|
metadata,
|
||||||
|
file_size: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,11 +13,13 @@ use serde::Deserialize;
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
/// Escape text content for safe HTML insertion.
|
/// Escape text content for safe HTML insertion.
|
||||||
|
#[inline]
|
||||||
fn esc(s: &str) -> String {
|
fn esc(s: &str) -> String {
|
||||||
encode_text(s).to_string()
|
encode_text(s).to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Escape attribute values for safe HTML attribute insertion.
|
/// Escape attribute values for safe HTML attribute insertion.
|
||||||
|
#[inline]
|
||||||
fn esc_attr(s: &str) -> String {
|
fn esc_attr(s: &str) -> String {
|
||||||
encode_double_quoted_attribute(s).to_string()
|
encode_double_quoted_attribute(s).to_string()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
use crate::filter_plugin::{FilterChain, parse_filter_string};
|
use crate::filter_plugin::{FilterChain, parse_filter_string};
|
||||||
use once_cell::sync::Lazy;
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::io::{Read, Result, Write};
|
use std::io::{Read, Result, Write};
|
||||||
use std::sync::Mutex;
|
use std::sync::Mutex;
|
||||||
@@ -166,8 +165,8 @@ impl FilterService {
|
|||||||
/// # Panics
|
/// # Panics
|
||||||
///
|
///
|
||||||
/// Lock acquisition failures (rare) cause panics in accessors.
|
/// Lock acquisition failures (rare) cause panics in accessors.
|
||||||
static FILTER_PLUGIN_REGISTRY: Lazy<Mutex<HashMap<String, FilterConstructor>>> =
|
static FILTER_PLUGIN_REGISTRY: std::sync::LazyLock<Mutex<HashMap<String, FilterConstructor>>> =
|
||||||
Lazy::new(|| Mutex::new(HashMap::new()));
|
std::sync::LazyLock::new(|| Mutex::new(HashMap::new()));
|
||||||
|
|
||||||
/// Registers a filter plugin in the global registry.
|
/// Registers a filter plugin in the global registry.
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -1,3 +1,8 @@
|
|||||||
|
/// Business logic services for the Keep application.
|
||||||
|
///
|
||||||
|
/// This module provides the core service layer that orchestrates item storage,
|
||||||
|
/// compression, metadata collection, and filtering. Services are used by both
|
||||||
|
/// local CLI modes and the HTTP server.
|
||||||
pub mod compression_service;
|
pub mod compression_service;
|
||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod filter_service;
|
pub mod filter_service;
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
use anyhow::{Result, bail};
|
use anyhow::{Result, bail};
|
||||||
use once_cell::sync::Lazy;
|
|
||||||
|
|
||||||
/// Supported LLM token encodings.
|
/// Supported LLM token encodings.
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
||||||
@@ -48,10 +47,10 @@ impl std::fmt::Debug for Tokenizer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Static tokenizer instances — loaded once per process, shared across all plugins.
|
/// Static tokenizer instances — loaded once per process, shared across all plugins.
|
||||||
static CL100K: Lazy<Tokenizer> = Lazy::new(|| {
|
static CL100K: std::sync::LazyLock<Tokenizer> = std::sync::LazyLock::new(|| {
|
||||||
Tokenizer::new(TokenEncoding::Cl100kBase).expect("Failed to create cl100k_base tokenizer")
|
Tokenizer::new(TokenEncoding::Cl100kBase).expect("Failed to create cl100k_base tokenizer")
|
||||||
});
|
});
|
||||||
static O200K: Lazy<Tokenizer> = Lazy::new(|| {
|
static O200K: std::sync::LazyLock<Tokenizer> = std::sync::LazyLock::new(|| {
|
||||||
Tokenizer::new(TokenEncoding::O200kBase).expect("Failed to create o200k_base tokenizer")
|
Tokenizer::new(TokenEncoding::O200kBase).expect("Failed to create o200k_base tokenizer")
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user