feat: add LLM token counting meta plugin and token filters

Add tiktoken-based token counting via new 'tokens' feature flag.

New components:
- Shared tokenizer module wrapping tiktoken CoreBPE (cl100k_base, o200k_base)
- TokensMetaPlugin: streaming token counter, tokenizes each chunk independently
- head_tokens(N): stream first N tokens, split at exact boundary when mid-chunk
- skip_tokens(N): skip first N tokens, stream the rest
- tail_tokens(N): bounded ring buffer (~16KB), outputs last N tokens at finalize

All filters are fully streaming — no full-stream buffering.
Meta plugin accuracy: exact for normal text, ±1-2 tokens if long whitespace
sequence spans a chunk boundary.

Also: add 'client' and 'tokens' to default features, add curl to Dockerfile builder stage.
This commit is contained in:
2026-03-13 16:48:31 -03:00
parent e672ec751e
commit 914190e119
9 changed files with 1128 additions and 3 deletions

View File

@@ -75,10 +75,11 @@ ureq = { version = "3", features = ["json"], optional = true }
os_pipe = { version = "1", optional = true }
axum-server = { version = "0.8", features = ["tls-rustls"], optional = true }
jsonwebtoken = { version = "10", optional = true, features = ["aws_lc_rs"] }
tiktoken-rs = { version = "0.9", optional = true }
[features]
# Default features include core compression engines and swagger UI
default = ["magic", "lz4", "gzip"]
default = ["magic", "lz4", "gzip", "client", "tokens"]
# Full
#default = ["server", "magic", "lz4", "swagger"]
@@ -113,6 +114,9 @@ client = ["dep:ureq", "dep:os_pipe"]
# TLS feature (HTTPS server support)
tls = ["dep:axum-server"]
# Token counting feature (LLM token support via tiktoken)
tokens = ["dep:tiktoken-rs"]
[dev-dependencies]
tempfile = "3.3.0"
rand = "0.8.5"