feat: add plugin schema system, tokenizer cache, and config validation
- Add plugin schema types and runtime discovery for meta/filter plugins - Rewrite --generate-config to use schema system instead of hardcoded types - Add Settings::validate_config() for startup validation - Cache tokenizer instances via static Lazy to avoid repeated BPE loading - Add split_by_token_iter() and count_bounded() to Tokenizer - Fix double-counting bug in TokensMetaPlugin when buffer < max_buffer_size - Eliminate unnecessary allocations in token count methods - Refactor token filters: remove Option<Tokenizer>, use iterator API - Fix TailTokensFilter correctness: unbounded buffer instead of ring buffer - Add encoding option to all token filters - Add description() to MetaPlugin and FilterPlugin traits - Fix unused_mut warning in compression engine (feature-gated code) Co-Authored-By: code-review-bot <noreply@anthropic.com>
This commit is contained in:
@@ -573,4 +573,65 @@ impl Settings {
|
||||
.map(|plugins| plugins.iter().map(|p| p.name.clone()).collect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Validates the configuration against plugin schemas.
|
||||
///
|
||||
/// Checks that:
|
||||
/// - All configured meta plugin names are valid and registered
|
||||
/// - Required options are present for each meta plugin
|
||||
/// - Compression plugin name (if set) is a valid compression type
|
||||
///
|
||||
/// Returns a list of warning strings. An empty list means the config is valid.
|
||||
pub fn validate_config(&self) -> Vec<String> {
|
||||
use crate::common::schema::gather_meta_plugin_schemas;
|
||||
use crate::compression_engine::CompressionType;
|
||||
use strum::IntoEnumIterator;
|
||||
|
||||
let mut warnings = Vec::new();
|
||||
|
||||
// Validate compression plugin
|
||||
if let Some(ref comp) = self.compression_plugin {
|
||||
let valid_types: Vec<String> =
|
||||
CompressionType::iter().map(|ct| ct.to_string()).collect();
|
||||
if !valid_types.contains(&comp.name) {
|
||||
warnings.push(format!(
|
||||
"Unknown compression_plugin.name: '{}'. Valid types: {}",
|
||||
comp.name,
|
||||
valid_types.join(", ")
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Validate meta plugins
|
||||
if let Some(ref plugins) = self.meta_plugins {
|
||||
let schemas = gather_meta_plugin_schemas();
|
||||
let schema_map: std::collections::HashMap<&str, &crate::common::schema::PluginSchema> =
|
||||
schemas.iter().map(|s| (s.name.as_str(), s)).collect();
|
||||
|
||||
for plugin in plugins {
|
||||
match schema_map.get(plugin.name.as_str()) {
|
||||
Some(schema) => {
|
||||
// Check required options
|
||||
for opt in &schema.options {
|
||||
if opt.required && !plugin.options.contains_key(&opt.name) {
|
||||
warnings.push(format!(
|
||||
"Meta plugin '{}': missing required option '{}'",
|
||||
plugin.name, opt.name
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
warnings.push(format!(
|
||||
"Unknown meta plugin: '{}'. Available: {}",
|
||||
plugin.name,
|
||||
schema_map.keys().copied().collect::<Vec<_>>().join(", ")
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
warnings
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user