refactor: decouple meta plugins from DB via SaveMetaFn callback, extract shared utilities

- Add SaveMetaFn callback pattern: meta plugins receive a closure instead of
  &Connection, enabling the same plugin code to work in local, client, and
  server contexts (collect-to-Vec, collect-to-HashMap, or direct DB write)
- Client save now runs meta plugins locally during streaming (smart client
  sets meta=false, server skips its own plugins)
- Add POST /api/item/{id}/update endpoint for re-running plugins on stored
  content without downloading compressed data
- Add client update mode (--update with --meta-plugin flags)
- Extract shared utilities: stream_copy, print_serialized, build_path_table,
  ensure_default_tag to reduce duplication across modes
- Add upsert_tag for idempotent tag addition (INSERT OR IGNORE)
- Add warn logging on save_meta lock failure in BaseMetaPlugin and MetaService
This commit is contained in:
2026-03-14 22:36:59 -03:00
parent fdc5f1d744
commit 5bad7ac7a6
39 changed files with 843 additions and 290 deletions

View File

@@ -1,12 +1,13 @@
use crate::client::{ItemInfo, KeepClient};
use crate::compression_engine::CompressionType;
use crate::config::Settings;
use crate::meta_plugin::SaveMetaFn;
use crate::modes::common::settings_compression_type;
use crate::services::meta_service::MetaService;
use anyhow::Result;
use clap::Command;
use is_terminal::IsTerminal;
use log::debug;
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::io::{Read, Write};
use std::sync::{Arc, Mutex};
@@ -14,11 +15,14 @@ use std::sync::{Arc, Mutex};
/// Streaming save mode for client.
///
/// Uses three threads for true streaming with constant memory:
/// - Reader thread: reads stdin, tees to stdout, computes SHA-256,
/// - Reader thread: reads stdin, tees to stdout, runs meta plugins,
/// compresses data, writes to OS pipe
/// - Pipe: zero-copy transfer of compressed bytes between threads
/// - Streamer thread: reads from pipe, streams to server via chunked HTTP
///
/// Meta plugins run on the client side during streaming. Collected metadata
/// is sent to the server via a separate POST after streaming completes.
///
/// Memory usage is O(PIPESIZE) regardless of data size.
pub fn mode(
client: &KeepClient,
@@ -29,33 +33,42 @@ pub fn mode(
) -> Result<(), anyhow::Error> {
debug!("CLIENT_SAVE: Saving item via remote server (streaming)");
if tags.is_empty() {
tags.push("none".to_string());
}
crate::modes::common::ensure_default_tag(tags);
// Determine compression type from settings
let compression_type = settings_compression_type(cmd, settings);
let server_compress = matches!(compression_type, CompressionType::None);
// Shared metadata collection: plugins write here via save_meta closure
let collected_meta: Arc<Mutex<HashMap<String, String>>> = Arc::new(Mutex::new(HashMap::new()));
let meta_collector = collected_meta.clone();
let save_meta: SaveMetaFn = Arc::new(Mutex::new(move |name: &str, value: &str| {
if let Ok(mut map) = meta_collector.lock() {
map.insert(name.to_string(), value.to_string());
}
}));
// Create MetaService and get plugins (must happen before spawning reader thread)
let meta_service = MetaService::new(save_meta);
let mut plugins = meta_service.get_plugins(cmd, settings);
// Create OS pipe for streaming compressed bytes between threads
let (pipe_reader, pipe_writer) = os_pipe::pipe()?;
// Shared state for reader thread results
let shared = Arc::new(Mutex::new((0u64, String::new())));
let shared_reader = Arc::clone(&shared);
// Reader thread: stdin → tee(stdout) → hash → compress → pipe
// Reader thread: stdin → tee(stdout) → meta plugins → compress → pipe
let compression_type_clone = compression_type.clone();
let reader_handle = std::thread::spawn(move || -> Result<(u64, String)> {
let reader_handle = std::thread::spawn(move || -> Result<u64> {
let stdin = std::io::stdin();
let stdout = std::io::stdout();
let mut stdin_lock = stdin.lock();
let mut stdout_lock = stdout.lock();
let mut hasher = Sha256::new();
let mut total_bytes = 0u64;
let mut buffer = [0u8; 8192];
// Initialize meta plugins
meta_service.initialize_plugins(&mut plugins);
// Wrap pipe writer with appropriate compression
let mut compressor: Box<dyn Write> = match compression_type_clone {
CompressionType::GZip => {
@@ -76,29 +89,23 @@ pub fn mode(
// Tee to stdout
stdout_lock.write_all(&buffer[..n])?;
// Update hash
hasher.update(&buffer[..n]);
// Feed chunk to meta plugins
meta_service.process_chunk(&mut plugins, &buffer[..n]);
total_bytes += n as u64;
// Compress and write to pipe
compressor.write_all(&buffer[..n])?;
}
// Finalize meta plugins (digest, text, tokens produce final output here)
meta_service.finalize_plugins(&mut plugins);
// Explicitly flush and finalize compression before dropping.
// LZ4 FrameEncoder buffers data internally; without explicit flush,
// only the frame header (7 bytes) gets written to the pipe.
compressor.flush()?;
drop(compressor);
// Pipe writer is now dropped (inside compressor), signaling EOF to streamer
let digest = format!("{:x}", hasher.finalize());
// Set shared state for main thread
let mut shared = shared_reader.lock().expect("client save mutex poisoned");
*shared = (total_bytes, digest.clone());
Ok((total_bytes, digest))
Ok(total_bytes)
});
// Streamer thread: reads compressed bytes from pipe → POST to server
@@ -132,28 +139,27 @@ pub fn mode(
.map_err(|e| anyhow::anyhow!("Streamer thread panicked: {:?}", e))??;
// Wait for reader thread (should complete quickly after pipe is drained)
reader_handle
let uncompressed_size = reader_handle
.join()
.map_err(|e| anyhow::anyhow!("Reader thread panicked: {:?}", e))??;
// Read results from shared state
let (uncompressed_size, digest) = {
let shared = shared.lock().expect("client save mutex poisoned");
shared.clone()
};
// Build local metadata and send to server
// Merge plugin-collected metadata with CLI metadata
let mut local_metadata = metadata;
local_metadata.insert("digest_sha256".to_string(), digest);
// Add plugin-collected metadata (digest, hostname, text stats, etc.)
if let Ok(plugin_meta) = collected_meta.lock() {
for (k, v) in plugin_meta.iter() {
local_metadata.entry(k.clone()).or_insert_with(|| v.clone());
}
}
// Add uncompressed_size (always tracked by client)
local_metadata.insert(
"uncompressed_size".to_string(),
uncompressed_size.to_string(),
);
// Record client compression type so the client can decompress on retrieval.
// When compress=false, the server stores the blob as-is with compression=None.
// Without this metadata, the client would get compressed bytes back but think
// they're uncompressed.
if !matches!(compression_type, CompressionType::None) {
local_metadata.insert(
"_client_compression".to_string(),
@@ -161,13 +167,6 @@ pub fn mode(
);
}
// Add hostname
if let Ok(hostname) = gethostname::gethostname().into_string() {
local_metadata.insert("hostname".to_string(), hostname.clone());
let short = hostname.split('.').next().unwrap_or(&hostname).to_string();
local_metadata.insert("hostname_short".to_string(), short);
}
// Send metadata to server
if !local_metadata.is_empty() {
client.post_metadata(item_info.id, &local_metadata)?;