refactor: decouple meta plugins from DB via SaveMetaFn callback, extract shared utilities
- Add SaveMetaFn callback pattern: meta plugins receive a closure instead of
&Connection, enabling the same plugin code to work in local, client, and
server contexts (collect-to-Vec, collect-to-HashMap, or direct DB write)
- Client save now runs meta plugins locally during streaming (smart client
sets meta=false, server skips its own plugins)
- Add POST /api/item/{id}/update endpoint for re-running plugins on stored
content without downloading compressed data
- Add client update mode (--update with --meta-plugin flags)
- Extract shared utilities: stream_copy, print_serialized, build_path_table,
ensure_default_tag to reduce duplication across modes
- Add upsert_tag for idempotent tag addition (INSERT OR IGNORE)
- Add warn logging on save_meta lock failure in BaseMetaPlugin and MetaService
This commit is contained in:
@@ -1,12 +1,13 @@
|
||||
use crate::client::{ItemInfo, KeepClient};
|
||||
use crate::compression_engine::CompressionType;
|
||||
use crate::config::Settings;
|
||||
use crate::meta_plugin::SaveMetaFn;
|
||||
use crate::modes::common::settings_compression_type;
|
||||
use crate::services::meta_service::MetaService;
|
||||
use anyhow::Result;
|
||||
use clap::Command;
|
||||
use is_terminal::IsTerminal;
|
||||
use log::debug;
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::collections::HashMap;
|
||||
use std::io::{Read, Write};
|
||||
use std::sync::{Arc, Mutex};
|
||||
@@ -14,11 +15,14 @@ use std::sync::{Arc, Mutex};
|
||||
/// Streaming save mode for client.
|
||||
///
|
||||
/// Uses three threads for true streaming with constant memory:
|
||||
/// - Reader thread: reads stdin, tees to stdout, computes SHA-256,
|
||||
/// - Reader thread: reads stdin, tees to stdout, runs meta plugins,
|
||||
/// compresses data, writes to OS pipe
|
||||
/// - Pipe: zero-copy transfer of compressed bytes between threads
|
||||
/// - Streamer thread: reads from pipe, streams to server via chunked HTTP
|
||||
///
|
||||
/// Meta plugins run on the client side during streaming. Collected metadata
|
||||
/// is sent to the server via a separate POST after streaming completes.
|
||||
///
|
||||
/// Memory usage is O(PIPESIZE) regardless of data size.
|
||||
pub fn mode(
|
||||
client: &KeepClient,
|
||||
@@ -29,33 +33,42 @@ pub fn mode(
|
||||
) -> Result<(), anyhow::Error> {
|
||||
debug!("CLIENT_SAVE: Saving item via remote server (streaming)");
|
||||
|
||||
if tags.is_empty() {
|
||||
tags.push("none".to_string());
|
||||
}
|
||||
crate::modes::common::ensure_default_tag(tags);
|
||||
|
||||
// Determine compression type from settings
|
||||
let compression_type = settings_compression_type(cmd, settings);
|
||||
let server_compress = matches!(compression_type, CompressionType::None);
|
||||
|
||||
// Shared metadata collection: plugins write here via save_meta closure
|
||||
let collected_meta: Arc<Mutex<HashMap<String, String>>> = Arc::new(Mutex::new(HashMap::new()));
|
||||
let meta_collector = collected_meta.clone();
|
||||
let save_meta: SaveMetaFn = Arc::new(Mutex::new(move |name: &str, value: &str| {
|
||||
if let Ok(mut map) = meta_collector.lock() {
|
||||
map.insert(name.to_string(), value.to_string());
|
||||
}
|
||||
}));
|
||||
|
||||
// Create MetaService and get plugins (must happen before spawning reader thread)
|
||||
let meta_service = MetaService::new(save_meta);
|
||||
let mut plugins = meta_service.get_plugins(cmd, settings);
|
||||
|
||||
// Create OS pipe for streaming compressed bytes between threads
|
||||
let (pipe_reader, pipe_writer) = os_pipe::pipe()?;
|
||||
|
||||
// Shared state for reader thread results
|
||||
let shared = Arc::new(Mutex::new((0u64, String::new())));
|
||||
let shared_reader = Arc::clone(&shared);
|
||||
|
||||
// Reader thread: stdin → tee(stdout) → hash → compress → pipe
|
||||
// Reader thread: stdin → tee(stdout) → meta plugins → compress → pipe
|
||||
let compression_type_clone = compression_type.clone();
|
||||
let reader_handle = std::thread::spawn(move || -> Result<(u64, String)> {
|
||||
let reader_handle = std::thread::spawn(move || -> Result<u64> {
|
||||
let stdin = std::io::stdin();
|
||||
let stdout = std::io::stdout();
|
||||
let mut stdin_lock = stdin.lock();
|
||||
let mut stdout_lock = stdout.lock();
|
||||
|
||||
let mut hasher = Sha256::new();
|
||||
let mut total_bytes = 0u64;
|
||||
let mut buffer = [0u8; 8192];
|
||||
|
||||
// Initialize meta plugins
|
||||
meta_service.initialize_plugins(&mut plugins);
|
||||
|
||||
// Wrap pipe writer with appropriate compression
|
||||
let mut compressor: Box<dyn Write> = match compression_type_clone {
|
||||
CompressionType::GZip => {
|
||||
@@ -76,29 +89,23 @@ pub fn mode(
|
||||
// Tee to stdout
|
||||
stdout_lock.write_all(&buffer[..n])?;
|
||||
|
||||
// Update hash
|
||||
hasher.update(&buffer[..n]);
|
||||
// Feed chunk to meta plugins
|
||||
meta_service.process_chunk(&mut plugins, &buffer[..n]);
|
||||
|
||||
total_bytes += n as u64;
|
||||
|
||||
// Compress and write to pipe
|
||||
compressor.write_all(&buffer[..n])?;
|
||||
}
|
||||
|
||||
// Finalize meta plugins (digest, text, tokens produce final output here)
|
||||
meta_service.finalize_plugins(&mut plugins);
|
||||
|
||||
// Explicitly flush and finalize compression before dropping.
|
||||
// LZ4 FrameEncoder buffers data internally; without explicit flush,
|
||||
// only the frame header (7 bytes) gets written to the pipe.
|
||||
compressor.flush()?;
|
||||
drop(compressor);
|
||||
|
||||
// Pipe writer is now dropped (inside compressor), signaling EOF to streamer
|
||||
|
||||
let digest = format!("{:x}", hasher.finalize());
|
||||
|
||||
// Set shared state for main thread
|
||||
let mut shared = shared_reader.lock().expect("client save mutex poisoned");
|
||||
*shared = (total_bytes, digest.clone());
|
||||
|
||||
Ok((total_bytes, digest))
|
||||
Ok(total_bytes)
|
||||
});
|
||||
|
||||
// Streamer thread: reads compressed bytes from pipe → POST to server
|
||||
@@ -132,28 +139,27 @@ pub fn mode(
|
||||
.map_err(|e| anyhow::anyhow!("Streamer thread panicked: {:?}", e))??;
|
||||
|
||||
// Wait for reader thread (should complete quickly after pipe is drained)
|
||||
reader_handle
|
||||
let uncompressed_size = reader_handle
|
||||
.join()
|
||||
.map_err(|e| anyhow::anyhow!("Reader thread panicked: {:?}", e))??;
|
||||
|
||||
// Read results from shared state
|
||||
let (uncompressed_size, digest) = {
|
||||
let shared = shared.lock().expect("client save mutex poisoned");
|
||||
shared.clone()
|
||||
};
|
||||
|
||||
// Build local metadata and send to server
|
||||
// Merge plugin-collected metadata with CLI metadata
|
||||
let mut local_metadata = metadata;
|
||||
local_metadata.insert("digest_sha256".to_string(), digest);
|
||||
|
||||
// Add plugin-collected metadata (digest, hostname, text stats, etc.)
|
||||
if let Ok(plugin_meta) = collected_meta.lock() {
|
||||
for (k, v) in plugin_meta.iter() {
|
||||
local_metadata.entry(k.clone()).or_insert_with(|| v.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Add uncompressed_size (always tracked by client)
|
||||
local_metadata.insert(
|
||||
"uncompressed_size".to_string(),
|
||||
uncompressed_size.to_string(),
|
||||
);
|
||||
|
||||
// Record client compression type so the client can decompress on retrieval.
|
||||
// When compress=false, the server stores the blob as-is with compression=None.
|
||||
// Without this metadata, the client would get compressed bytes back but think
|
||||
// they're uncompressed.
|
||||
if !matches!(compression_type, CompressionType::None) {
|
||||
local_metadata.insert(
|
||||
"_client_compression".to_string(),
|
||||
@@ -161,13 +167,6 @@ pub fn mode(
|
||||
);
|
||||
}
|
||||
|
||||
// Add hostname
|
||||
if let Ok(hostname) = gethostname::gethostname().into_string() {
|
||||
local_metadata.insert("hostname".to_string(), hostname.clone());
|
||||
let short = hostname.split('.').next().unwrap_or(&hostname).to_string();
|
||||
local_metadata.insert("hostname_short".to_string(), short);
|
||||
}
|
||||
|
||||
// Send metadata to server
|
||||
if !local_metadata.is_empty() {
|
||||
client.post_metadata(item_info.id, &local_metadata)?;
|
||||
|
||||
Reference in New Issue
Block a user