feat: plugin-declared parallel execution, switch to env_logger, update deps

Parallel execution (opt-in via MetaPlugin::parallel_safe):
- Add Send bound to MetaPlugin, parallel_safe() method (default false)
- Override to true in digest, tokens, exec, magic_file plugins
- MetaService: std::thread::scope for initialize_plugins and process_chunk
- Extract plugins via NullMetaPlugin sentinel + std::mem::replace (no unsafe)
- Panic tracking: join errors logged, NullMetaPlugin restored and finalized
- MetaPluginExec: Box<dyn Write> -> Box<dyn Write + Send>
- SendCookie wrapper for libmagic Cookie with unsafe impl Send

Logging (stderrlog -> env_logger):
- Custom format: [SSSSSS.mmm] LEVEL [module:] message (time-since-start ms)
- Default level: Warn (matches previous behavior)
- -v: Debug, -vv+: Trace, -q: off
- -vv+ shows module path

Maintenance:
- Bump deps: thiserror 2.0, config 0.15, dns-lookup 3.0, lz4_flex 0.12,
  ringbuf 0.4, rand 0.9, lazy_static 1.5, env_logger 0.11
- Update Cargo.lock (186 transitive packages)
- Clippy fixes: is_multiple_of, to_string_in_format_args, collapsible_if
- Fix double-counting bug in TokensMetaPlugin::update
- Fix schema description using plugin.description()

Co-Authored-By: opencode <noreply@opencode.ai>
This commit is contained in:
2026-03-13 21:49:51 -03:00
parent e7d8a83369
commit a07bb6b350
12 changed files with 1227 additions and 853 deletions

View File

@@ -1,13 +1,27 @@
use crate::config::Settings;
use crate::meta_plugin::{MetaPlugin, MetaPluginType};
use crate::meta_plugin::{MetaPlugin, MetaPluginResponse, MetaPluginType};
use crate::modes::common::settings_meta_plugin_types;
use clap::Command;
use log::debug;
use log::{debug, error};
use rusqlite::Connection;
use std::collections::HashMap;
pub struct MetaService;
/// Sentinel plugin used as a placeholder when extracting plugins for parallel
/// execution. The original plugin is written back immediately after the threads
/// complete. Never leaks into the DB or visible state.
struct NullMetaPlugin;
impl MetaPlugin for NullMetaPlugin {
fn meta_type(&self) -> MetaPluginType {
MetaPluginType::Digest
}
}
fn replace_plugin(plugins: &mut [Box<dyn MetaPlugin>], i: usize) -> Box<dyn MetaPlugin> {
std::mem::replace(&mut plugins[i], Box::new(NullMetaPlugin))
}
impl MetaService {
pub fn new() -> Self {
Self
@@ -77,16 +91,14 @@ impl MetaService {
for plugin in plugins.iter() {
let plugin_name = plugin.meta_type().to_string();
// For each plugin, collect all the output names it might write to
for (internal_name, output_config) in plugin.outputs() {
let output_name = match output_config {
serde_yaml::Value::String(remapped_name) => remapped_name.clone(),
serde_yaml::Value::Bool(true) => internal_name.clone(),
serde_yaml::Value::Bool(false) => continue, // This output is disabled
_ => internal_name.clone(), // Default to internal name for other types
serde_yaml::Value::Bool(false) => continue,
_ => internal_name.clone(),
};
// Only track outputs that will actually be written
if !matches!(output_config, serde_yaml::Value::Bool(false)) {
output_names
.entry(output_name)
@@ -96,7 +108,6 @@ impl MetaService {
}
}
// Print warnings for duplicate output names
for (output_name, plugin_names) in &output_names {
if plugin_names.len() > 1 {
log::warn!(
@@ -107,9 +118,68 @@ impl MetaService {
}
}
for meta_plugin in plugins.iter_mut() {
let response = meta_plugin.initialize();
self.process_plugin_response(conn, item_id, &mut **meta_plugin, response);
// Partition into parallel-safe and sequential indices
let (parallel_idx, sequential_idx): (Vec<usize>, Vec<usize>) = plugins
.iter()
.enumerate()
.filter(|(_, p)| !p.is_finalized())
.map(|(i, _)| i)
.partition(|&i| plugins[i].parallel_safe());
// Run parallel-safe plugins concurrently
if !parallel_idx.is_empty() {
// Extract plugins by unique index into a flat Vec indexed by position
let mut parallel_plugins: Vec<Box<dyn MetaPlugin>> =
Vec::with_capacity(parallel_idx.len());
for &i in &parallel_idx {
parallel_plugins.push(replace_plugin(plugins, i));
}
// Write results back to original slots sequentially (DB writes are serial)
let (results, panicked): (Vec<(usize, MetaPluginResponse)>, Vec<usize>) =
std::thread::scope(|s| {
let handles: Vec<_> = parallel_plugins
.iter_mut()
.map(|plugin| s.spawn(move || plugin.initialize()))
.collect();
let mut results = Vec::with_capacity(handles.len());
let mut panicked = Vec::new();
for (j, handle) in handles.into_iter().enumerate() {
match handle.join() {
Ok(response) => results.push((j, response)),
Err(e) => {
error!("META_SERVICE: Plugin panicked during initialize: {e:?}");
panicked.push(j);
}
}
}
(results, panicked)
});
for (j, response) in results {
store_plugin_metadata(conn, item_id, &response);
let mut plugin = replace_plugin(&mut parallel_plugins, j);
if response.is_finalized {
plugin.set_finalized(true);
}
plugins[parallel_idx[j]] = plugin;
}
// Panicked plugins: restore the NullMetaPlugin sentinel and
// mark it finalized so future phases skip it cleanly.
for j in panicked {
let mut plugin = replace_plugin(&mut parallel_plugins, j);
plugin.set_finalized(true);
plugins[parallel_idx[j]] = plugin;
}
}
// Run sequential plugins
for &i in &sequential_idx {
let response = plugins[i].initialize();
store_plugin_metadata(conn, item_id, &response);
if response.is_finalized {
plugins[i].set_finalized(true);
}
}
}
@@ -120,18 +190,64 @@ impl MetaService {
conn: &Connection,
item_id: i64,
) {
for meta_plugin in plugins.iter_mut() {
// Skip plugins that are already finalized
if meta_plugin.is_finalized() {
continue;
// Partition non-finalized plugins by parallel_safe
let (parallel_idx, sequential_idx): (Vec<usize>, Vec<usize>) = plugins
.iter()
.enumerate()
.filter(|(_, p)| !p.is_finalized())
.map(|(i, _)| i)
.partition(|&i| plugins[i].parallel_safe());
// Run parallel-safe plugins concurrently on this chunk
if !parallel_idx.is_empty() {
// Extract plugins by unique index into a flat Vec indexed by position
let mut parallel_plugins: Vec<Box<dyn MetaPlugin>> =
Vec::with_capacity(parallel_idx.len());
for &i in &parallel_idx {
parallel_plugins.push(replace_plugin(plugins, i));
}
let response = meta_plugin.update(chunk);
self.process_plugin_response(conn, item_id, &mut **meta_plugin, response.clone());
let (results, panicked): (Vec<(usize, MetaPluginResponse)>, Vec<usize>) =
std::thread::scope(|s| {
let handles: Vec<_> = parallel_plugins
.iter_mut()
.map(|plugin| s.spawn(move || plugin.update(chunk)))
.collect();
let mut results = Vec::with_capacity(handles.len());
let mut panicked = Vec::new();
for (j, handle) in handles.into_iter().enumerate() {
match handle.join() {
Ok(response) => results.push((j, response)),
Err(e) => {
error!("META_SERVICE: Plugin panicked during update: {e:?}");
panicked.push(j);
}
}
}
(results, panicked)
});
// Set finalized flag if response indicates finalization
for (j, response) in results {
store_plugin_metadata(conn, item_id, &response);
let mut plugin = replace_plugin(&mut parallel_plugins, j);
if response.is_finalized {
plugin.set_finalized(true);
}
plugins[parallel_idx[j]] = plugin;
}
for j in panicked {
let mut plugin = replace_plugin(&mut parallel_plugins, j);
plugin.set_finalized(true);
plugins[parallel_idx[j]] = plugin;
}
}
// Run sequential plugins
for &i in &sequential_idx {
let response = plugins[i].update(chunk);
store_plugin_metadata(conn, item_id, &response);
if response.is_finalized {
meta_plugin.set_finalized(true);
plugins[i].set_finalized(true);
}
}
}
@@ -143,57 +259,19 @@ impl MetaService {
item_id: i64,
) {
for meta_plugin in plugins.iter_mut() {
// Skip plugins that are already finalized
if meta_plugin.is_finalized() {
continue;
}
let response = meta_plugin.finalize();
self.process_plugin_response(conn, item_id, &mut **meta_plugin, response.clone());
store_plugin_metadata(conn, item_id, &response);
// Set finalized flag if response indicates finalization
if response.is_finalized {
meta_plugin.set_finalized(true);
}
}
}
/// Internal helper to process a meta plugin response and store metadata.
///
/// Iterates over the metadata entries in the response and stores each in the database
/// using `store_meta`. Logs warnings if storage fails.
///
/// # Arguments
///
/// * `conn` - Database connection.
/// * `item_id` - Item ID to associate with the metadata.
/// * `_plugin` - Reference to the plugin (unused).
/// * `response` - The plugin response containing metadata.
///
/// # Errors
///
/// Logs warnings for individual storage failures but does not return errors.
fn process_plugin_response(
&self,
conn: &Connection,
item_id: i64,
_plugin: &mut dyn MetaPlugin,
response: crate::meta_plugin::MetaPluginResponse,
) {
for meta_data in response.metadata {
// The metadata has already been processed by the plugin, so we can use it directly
// Save to database
let db_meta = crate::db::Meta {
id: item_id,
name: meta_data.name,
value: meta_data.value,
};
if let Err(e) = crate::db::store_meta(conn, db_meta) {
log::warn!("META_SERVICE: Failed to store metadata: {e}");
}
}
}
/// Collects initial metadata from environment variables and hostname.
///
/// Gathers metadata from `KEEP_META_*` environment variables and adds hostname
@@ -222,6 +300,26 @@ impl MetaService {
}
}
/// Stores metadata entries from a plugin response into the database.
///
/// # Arguments
///
/// * `conn` - Database connection.
/// * `item_id` - Item ID to associate with the metadata.
/// * `response` - The plugin response containing metadata.
fn store_plugin_metadata(conn: &Connection, item_id: i64, response: &MetaPluginResponse) {
for meta_data in &response.metadata {
let db_meta = crate::db::Meta {
id: item_id,
name: meta_data.name.clone(),
value: meta_data.value.clone(),
};
if let Err(e) = crate::db::store_meta(conn, db_meta) {
log::warn!("META_SERVICE: Failed to store metadata: {e}");
}
}
}
impl Default for MetaService {
/// Provides a default `MetaService` instance.
///