Files
keep/src/lib.rs
Andrew Phillips 914190e119 feat: add LLM token counting meta plugin and token filters
Add tiktoken-based token counting via new 'tokens' feature flag.

New components:
- Shared tokenizer module wrapping tiktoken CoreBPE (cl100k_base, o200k_base)
- TokensMetaPlugin: streaming token counter, tokenizes each chunk independently
- head_tokens(N): stream first N tokens, split at exact boundary when mid-chunk
- skip_tokens(N): skip first N tokens, stream the rest
- tail_tokens(N): bounded ring buffer (~16KB), outputs last N tokens at finalize

All filters are fully streaming — no full-stream buffering.
Meta plugin accuracy: exact for normal text, ±1-2 tokens if long whitespace
sequence spans a chunk boundary.

Also: add 'client' and 'tokens' to default features, add curl to Dockerfile builder stage.
2026-03-13 16:48:31 -03:00

96 lines
2.4 KiB
Rust

#![deny(clippy::all)]
#![deny(unsafe_code)]
#![allow(unused_imports)]
//! Keep library for managing temporary files with compression and metadata.
//!
//! This library provides core functionality for the Keep application, including
//! database operations, compression engines, item services, and plugin systems
//! for metadata and filtering. It supports CLI modes, server APIs, and plugin
//! registration via ctors.
//!
//! # Usage
//!
//! Add to Cargo.toml and use re-exported types:
//! ```toml
//! [dependencies]
//! keep = "0.1"
//! ```
//!
//! ```rust
//! # use keep::Args;
//! # use clap::Parser;
//! let args = Args::parse();
//! ```
//!
//! # Features
//!
//! - `server`: Enables Axum-based HTTP server.
//! - `gzip`, `lz4`: Built-in compression support.
//! - `magic`: File type detection via libmagic.
// Re-export modules for testing
pub mod args;
pub mod common;
pub mod compression_engine;
pub mod config;
pub mod db;
pub mod filter_plugin;
pub mod meta_plugin;
pub mod modes;
pub mod services;
#[cfg(feature = "client")]
pub mod client;
#[cfg(feature = "tokens")]
pub mod tokenizer;
// Re-export Args struct for library usage
pub use args::Args;
// Re-export PIPESIZE constant
pub use common::PIPESIZE;
// Import all filter plugins to ensure they register themselves
#[allow(unused_imports)]
use filter_plugin::{grep, head, skip, strip_ansi, tail};
#[cfg(feature = "tokens")]
#[allow(unused_imports)]
use filter_plugin::tokens as token_filters;
use crate::meta_plugin::{
cwd, digest, env, exec, hostname, keep_pid, read_rate, read_time, shell, shell_pid, user,
};
#[cfg(feature = "magic")]
#[allow(unused_imports)]
use crate::meta_plugin::magic_file;
#[cfg(feature = "tokens")]
#[allow(unused_imports)]
use crate::meta_plugin::tokens;
/// Initializes plugins at library load time.
///
/// Plugin registration happens automatically via `#[ctor]` constructors
/// when each plugin module is loaded. The explicit module imports in
/// `lib.rs` guarantee this happens at library initialization time.
///
/// This function exists as a public API entry point for callers that
/// want to explicitly ensure plugins are ready. It intentionally does
/// no additional work.
///
/// # Examples
///
/// ```
/// keep::init_plugins();
/// ```
pub fn init_plugins() {
// Plugins self-register via #[ctor] on module load.
// The use-statements in lib.rs guarantee module inclusion.
}
#[cfg(test)]
mod tests;