- Add SaveMetaFn callback pattern: meta plugins receive a closure instead of
&Connection, enabling the same plugin code to work in local, client, and
server contexts (collect-to-Vec, collect-to-HashMap, or direct DB write)
- Client save now runs meta plugins locally during streaming (smart client
sets meta=false, server skips its own plugins)
- Add POST /api/item/{id}/update endpoint for re-running plugins on stored
content without downloading compressed data
- Add client update mode (--update with --meta-plugin flags)
- Extract shared utilities: stream_copy, print_serialized, build_path_table,
ensure_default_tag to reduce duplication across modes
- Add upsert_tag for idempotent tag addition (INSERT OR IGNORE)
- Add warn logging on save_meta lock failure in BaseMetaPlugin and MetaService
297 lines
12 KiB
Rust
297 lines
12 KiB
Rust
use crate::config::Settings;
|
|
use crate::meta_plugin::{MetaPlugin, MetaPluginResponse, MetaPluginType, SaveMetaFn};
|
|
use crate::modes::common::settings_meta_plugin_types;
|
|
use clap::Command;
|
|
use log::{debug, error, warn};
|
|
use std::collections::HashMap;
|
|
|
|
pub struct MetaService {
|
|
save_meta: SaveMetaFn,
|
|
}
|
|
|
|
/// Sentinel plugin used as a placeholder when extracting plugins for parallel
|
|
/// execution. The original plugin is written back immediately after the threads
|
|
/// complete. Never leaks into the DB or visible state.
|
|
struct NullMetaPlugin;
|
|
impl MetaPlugin for NullMetaPlugin {
|
|
fn meta_type(&self) -> MetaPluginType {
|
|
MetaPluginType::Digest
|
|
}
|
|
}
|
|
|
|
fn replace_plugin(plugins: &mut [Box<dyn MetaPlugin>], i: usize) -> Box<dyn MetaPlugin> {
|
|
std::mem::replace(&mut plugins[i], Box::new(NullMetaPlugin))
|
|
}
|
|
|
|
/// Stores metadata entries from a plugin response via the save_meta callback.
|
|
fn store_plugin_response(response: &MetaPluginResponse, save_meta: &SaveMetaFn) {
|
|
if let Ok(mut f) = save_meta.lock() {
|
|
for meta_data in &response.metadata {
|
|
f(&meta_data.name, &meta_data.value);
|
|
}
|
|
} else {
|
|
warn!(
|
|
"META_SERVICE: save_meta lock poisoned, dropping {} metadata entries",
|
|
response.metadata.len()
|
|
);
|
|
}
|
|
}
|
|
|
|
impl MetaService {
|
|
/// Creates a new MetaService with the given save_meta callback.
|
|
///
|
|
/// All plugins created by this service will share this callback for
|
|
/// persisting metadata. The callback is wrapped in Arc<Mutex<>> so it
|
|
/// can be cloned into parallel-safe plugin threads.
|
|
pub fn new(save_meta: SaveMetaFn) -> Self {
|
|
Self { save_meta }
|
|
}
|
|
|
|
pub fn get_plugins(&self, cmd: &mut Command, settings: &Settings) -> Vec<Box<dyn MetaPlugin>> {
|
|
debug!("META_SERVICE: get_plugins called");
|
|
let meta_plugin_types: Vec<MetaPluginType> = settings_meta_plugin_types(cmd, settings);
|
|
debug!("META_SERVICE: Meta plugin types from settings: {meta_plugin_types:?}");
|
|
|
|
// Create plugins with their configuration and wire save_meta
|
|
let meta_plugins: Vec<Box<dyn MetaPlugin>> = meta_plugin_types
|
|
.iter()
|
|
.filter_map(|meta_plugin_type| {
|
|
debug!("META_SERVICE: Creating plugin: {meta_plugin_type:?}");
|
|
|
|
// Get the plugin name using strum's Display implementation
|
|
let plugin_name = meta_plugin_type.to_string();
|
|
|
|
// Get options and outputs from settings
|
|
let (options, outputs) = if let Some(meta_plugin_configs) = &settings.meta_plugins {
|
|
if let Some(config) = meta_plugin_configs.iter().find(|c| c.name == plugin_name)
|
|
{
|
|
// Convert options and outputs to the appropriate types
|
|
let options: std::collections::HashMap<String, serde_yaml::Value> = config
|
|
.options
|
|
.iter()
|
|
.map(|(k, v)| (k.clone(), v.clone()))
|
|
.collect();
|
|
|
|
let outputs: std::collections::HashMap<String, serde_yaml::Value> = config
|
|
.outputs
|
|
.iter()
|
|
.map(|(k, v)| (k.clone(), serde_yaml::Value::String(v.clone())))
|
|
.collect();
|
|
|
|
(Some(options), Some(outputs))
|
|
} else {
|
|
(None, None)
|
|
}
|
|
} else {
|
|
(None, None)
|
|
};
|
|
|
|
match crate::meta_plugin::get_meta_plugin_with_save(
|
|
meta_plugin_type.clone(),
|
|
options,
|
|
outputs,
|
|
Some(self.save_meta.clone()),
|
|
) {
|
|
Ok(plugin) => Some(plugin),
|
|
Err(e) => {
|
|
log::warn!("META_SERVICE: Failed to create plugin {meta_plugin_type:?}: {e}, skipping");
|
|
None
|
|
}
|
|
}
|
|
})
|
|
.collect();
|
|
|
|
meta_plugins
|
|
}
|
|
|
|
pub fn initialize_plugins(&self, plugins: &mut [Box<dyn MetaPlugin>]) {
|
|
// Check for duplicate output names before initializing plugins
|
|
let mut output_names: std::collections::HashMap<String, Vec<String>> =
|
|
std::collections::HashMap::new();
|
|
|
|
for plugin in plugins.iter() {
|
|
let plugin_name = plugin.meta_type().to_string();
|
|
for (internal_name, output_config) in plugin.outputs() {
|
|
let output_name = match output_config {
|
|
serde_yaml::Value::String(remapped_name) => remapped_name.clone(),
|
|
serde_yaml::Value::Bool(true) => internal_name.clone(),
|
|
serde_yaml::Value::Bool(false) => continue,
|
|
_ => internal_name.clone(),
|
|
};
|
|
|
|
if !matches!(output_config, serde_yaml::Value::Bool(false)) {
|
|
output_names
|
|
.entry(output_name)
|
|
.or_default()
|
|
.push(plugin_name.clone());
|
|
}
|
|
}
|
|
}
|
|
|
|
for (output_name, plugin_names) in &output_names {
|
|
if plugin_names.len() > 1 {
|
|
log::warn!(
|
|
"META_SERVICE: Output name '{}' is provided by multiple plugins: {}",
|
|
output_name,
|
|
plugin_names.join(", ")
|
|
);
|
|
}
|
|
}
|
|
|
|
// Partition into parallel-safe and sequential indices
|
|
let (parallel_idx, sequential_idx): (Vec<usize>, Vec<usize>) = plugins
|
|
.iter()
|
|
.enumerate()
|
|
.filter(|(_, p)| !p.is_finalized())
|
|
.map(|(i, _)| i)
|
|
.partition(|&i| plugins[i].parallel_safe());
|
|
|
|
// Run parallel-safe plugins concurrently
|
|
if !parallel_idx.is_empty() {
|
|
// Extract plugins by unique index into a flat Vec indexed by position
|
|
let mut parallel_plugins: Vec<Box<dyn MetaPlugin>> =
|
|
Vec::with_capacity(parallel_idx.len());
|
|
for &i in ¶llel_idx {
|
|
parallel_plugins.push(replace_plugin(plugins, i));
|
|
}
|
|
|
|
let (results, panicked): (Vec<(usize, MetaPluginResponse)>, Vec<usize>) =
|
|
std::thread::scope(|s| {
|
|
let handles: Vec<_> = parallel_plugins
|
|
.iter_mut()
|
|
.map(|plugin| s.spawn(move || plugin.initialize()))
|
|
.collect();
|
|
let mut results = Vec::with_capacity(handles.len());
|
|
let mut panicked = Vec::new();
|
|
for (j, handle) in handles.into_iter().enumerate() {
|
|
match handle.join() {
|
|
Ok(response) => results.push((j, response)),
|
|
Err(e) => {
|
|
error!("META_SERVICE: Plugin panicked during initialize: {e:?}");
|
|
panicked.push(j);
|
|
}
|
|
}
|
|
}
|
|
(results, panicked)
|
|
});
|
|
|
|
for (j, response) in results {
|
|
store_plugin_response(&response, &self.save_meta);
|
|
let mut plugin = replace_plugin(&mut parallel_plugins, j);
|
|
if response.is_finalized {
|
|
plugin.set_finalized(true);
|
|
}
|
|
plugins[parallel_idx[j]] = plugin;
|
|
}
|
|
for j in panicked {
|
|
let mut plugin = replace_plugin(&mut parallel_plugins, j);
|
|
plugin.set_finalized(true);
|
|
plugins[parallel_idx[j]] = plugin;
|
|
}
|
|
}
|
|
|
|
// Run sequential plugins
|
|
for &i in &sequential_idx {
|
|
let response = plugins[i].initialize();
|
|
store_plugin_response(&response, &self.save_meta);
|
|
if response.is_finalized {
|
|
plugins[i].set_finalized(true);
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn process_chunk(&self, plugins: &mut [Box<dyn MetaPlugin>], chunk: &[u8]) {
|
|
// Partition non-finalized plugins by parallel_safe
|
|
let (parallel_idx, sequential_idx): (Vec<usize>, Vec<usize>) = plugins
|
|
.iter()
|
|
.enumerate()
|
|
.filter(|(_, p)| !p.is_finalized())
|
|
.map(|(i, _)| i)
|
|
.partition(|&i| plugins[i].parallel_safe());
|
|
|
|
// Run parallel-safe plugins concurrently on this chunk
|
|
if !parallel_idx.is_empty() {
|
|
let mut parallel_plugins: Vec<Box<dyn MetaPlugin>> =
|
|
Vec::with_capacity(parallel_idx.len());
|
|
for &i in ¶llel_idx {
|
|
parallel_plugins.push(replace_plugin(plugins, i));
|
|
}
|
|
|
|
let (results, panicked): (Vec<(usize, MetaPluginResponse)>, Vec<usize>) =
|
|
std::thread::scope(|s| {
|
|
let handles: Vec<_> = parallel_plugins
|
|
.iter_mut()
|
|
.map(|plugin| s.spawn(move || plugin.update(chunk)))
|
|
.collect();
|
|
let mut results = Vec::with_capacity(handles.len());
|
|
let mut panicked = Vec::new();
|
|
for (j, handle) in handles.into_iter().enumerate() {
|
|
match handle.join() {
|
|
Ok(response) => results.push((j, response)),
|
|
Err(e) => {
|
|
error!("META_SERVICE: Plugin panicked during update: {e:?}");
|
|
panicked.push(j);
|
|
}
|
|
}
|
|
}
|
|
(results, panicked)
|
|
});
|
|
|
|
for (j, response) in results {
|
|
store_plugin_response(&response, &self.save_meta);
|
|
let mut plugin = replace_plugin(&mut parallel_plugins, j);
|
|
if response.is_finalized {
|
|
plugin.set_finalized(true);
|
|
}
|
|
plugins[parallel_idx[j]] = plugin;
|
|
}
|
|
for j in panicked {
|
|
let mut plugin = replace_plugin(&mut parallel_plugins, j);
|
|
plugin.set_finalized(true);
|
|
plugins[parallel_idx[j]] = plugin;
|
|
}
|
|
}
|
|
|
|
// Run sequential plugins
|
|
for &i in &sequential_idx {
|
|
let response = plugins[i].update(chunk);
|
|
store_plugin_response(&response, &self.save_meta);
|
|
if response.is_finalized {
|
|
plugins[i].set_finalized(true);
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn finalize_plugins(&self, plugins: &mut [Box<dyn MetaPlugin>]) {
|
|
for meta_plugin in plugins.iter_mut() {
|
|
if meta_plugin.is_finalized() {
|
|
continue;
|
|
}
|
|
|
|
let response = meta_plugin.finalize();
|
|
store_plugin_response(&response, &self.save_meta);
|
|
|
|
if response.is_finalized {
|
|
meta_plugin.set_finalized(true);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Collects initial metadata from environment variables and hostname.
|
|
pub fn collect_initial_meta(&self) -> HashMap<String, String> {
|
|
Self::collect_initial_meta_static()
|
|
}
|
|
|
|
/// Static version of collect_initial_meta for use without a MetaService instance.
|
|
pub fn collect_initial_meta_static() -> HashMap<String, String> {
|
|
let mut item_meta: HashMap<String, String> = crate::modes::common::get_meta_from_env();
|
|
|
|
if let Ok(hostname) = gethostname::gethostname().into_string()
|
|
&& !item_meta.contains_key("hostname")
|
|
{
|
|
item_meta.insert("hostname".to_string(), hostname);
|
|
}
|
|
item_meta
|
|
}
|
|
}
|