Files
keep/src/meta_plugin/magic_file.rs
Andrew Phillips 5bad7ac7a6 refactor: decouple meta plugins from DB via SaveMetaFn callback, extract shared utilities
- Add SaveMetaFn callback pattern: meta plugins receive a closure instead of
  &Connection, enabling the same plugin code to work in local, client, and
  server contexts (collect-to-Vec, collect-to-HashMap, or direct DB write)
- Client save now runs meta plugins locally during streaming (smart client
  sets meta=false, server skips its own plugins)
- Add POST /api/item/{id}/update endpoint for re-running plugins on stored
  content without downloading compressed data
- Add client update mode (--update with --meta-plugin flags)
- Extract shared utilities: stream_copy, print_serialized, build_path_table,
  ensure_default_tag to reduce duplication across modes
- Add upsert_tag for idempotent tag addition (INSERT OR IGNORE)
- Add warn logging on save_meta lock failure in BaseMetaPlugin and MetaService
2026-03-14 22:36:59 -03:00

453 lines
13 KiB
Rust

#[cfg(feature = "magic")]
use magic::{Cookie, CookieFlags};
#[cfg(not(feature = "magic"))]
use std::process::{Command, Stdio};
use std::io::{self, Write};
use std::path::Path;
use crate::meta_plugin::{
BaseMetaPlugin, MetaData, MetaPlugin, MetaPluginResponse, MetaPluginType,
process_metadata_outputs,
};
// Thread-local libmagic cookie, lazily initialized on first access per thread.
// Each thread gets its own independent Cookie instance. Libmagic documents that
// separate cookies can be used from different threads concurrently without
// synchronization. Using thread_local! avoids unsafe impl Send since the
// storage is inherently !Send.
#[cfg(feature = "magic")]
thread_local! {
static MAGIC_COOKIE: std::cell::RefCell<Option<Cookie>> = const { std::cell::RefCell::new(None) };
}
#[cfg(feature = "magic")]
#[derive(Debug)]
pub struct MagicFileMetaPluginImpl {
buffer: Vec<u8>,
max_buffer_size: usize,
is_finalized: bool,
base: BaseMetaPlugin,
}
#[cfg(feature = "magic")]
impl MagicFileMetaPluginImpl {
pub fn new(
options: Option<std::collections::HashMap<String, serde_yaml::Value>>,
outputs: Option<std::collections::HashMap<String, serde_yaml::Value>>,
) -> MagicFileMetaPluginImpl {
let mut base = BaseMetaPlugin::new();
// Set default outputs
let default_outputs = &["mime_type", "mime_encoding", "file_type"];
base.initialize_plugin(default_outputs, &options, &outputs);
// Get max_buffer_size from options, default to PIPESIZE
let max_buffer_size = base
.options
.get("max_buffer_size")
.and_then(|v| v.as_u64())
.unwrap_or(crate::common::PIPESIZE as u64) as usize;
MagicFileMetaPluginImpl {
buffer: Vec::new(),
max_buffer_size,
is_finalized: false,
base,
}
}
fn get_magic_result(&self, flags: CookieFlags) -> io::Result<String> {
MAGIC_COOKIE.with(|cell| {
// Lazy init: create cookie on first access per thread
{
let mut opt = cell.borrow_mut();
if opt.is_none() {
let cookie = Cookie::open(CookieFlags::default())
.map_err(|e| io::Error::other(format!("Failed to open magic: {e}")))?;
cookie.load(&[] as &[&Path]).map_err(|e| {
io::Error::other(format!("Failed to load magic database: {e}"))
})?;
*opt = Some(cookie);
}
}
let cookie_ref = cell.borrow();
let cookie = cookie_ref.as_ref().expect("cookie initialized above");
cookie
.set_flags(flags)
.map_err(|e| io::Error::other(format!("Failed to set magic flags: {e}")))?;
let result = cookie
.buffer(&self.buffer)
.map_err(|e| io::Error::other(format!("Failed to analyze buffer: {e}")))?;
Ok(result.trim().to_string())
})
}
fn process_magic_types(&self) -> Vec<MetaData> {
let mut metadata = Vec::new();
let types_to_process = [
("mime_type", CookieFlags::MIME_TYPE),
("mime_encoding", CookieFlags::MIME_ENCODING),
("file_type", CookieFlags::empty()),
];
for (name, flags) in types_to_process.iter() {
if let Ok(result) = self.get_magic_result(*flags)
&& !result.is_empty()
&& let Some(meta_data) = process_metadata_outputs(
name,
serde_yaml::Value::String(result),
self.base.outputs(),
)
{
metadata.push(meta_data);
}
}
metadata
}
}
#[cfg(feature = "magic")]
impl MetaPlugin for MagicFileMetaPluginImpl {
fn is_finalized(&self) -> bool {
self.is_finalized
}
fn set_finalized(&mut self, finalized: bool) {
self.is_finalized = finalized;
}
fn set_save_meta(&mut self, save_meta: crate::meta_plugin::SaveMetaFn) {
self.base.set_save_meta(save_meta);
}
fn save_meta(&self, name: &str, value: &str) {
self.base.save_meta(name, value);
}
fn initialize(&mut self) -> MetaPluginResponse {
// Cookie is lazily initialized in the thread-local on first use.
MetaPluginResponse {
metadata: Vec::new(),
is_finalized: false,
}
}
fn update(&mut self, data: &[u8]) -> MetaPluginResponse {
if self.is_finalized {
return MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len());
if remaining_capacity > 0 {
let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity);
self.buffer.extend_from_slice(&data[..bytes_to_copy]);
if self.buffer.len() >= self.max_buffer_size {
let metadata = self.process_magic_types();
self.is_finalized = true;
return MetaPluginResponse {
metadata,
is_finalized: true,
};
}
}
MetaPluginResponse {
metadata: Vec::new(),
is_finalized: false,
}
}
fn finalize(&mut self) -> MetaPluginResponse {
if self.is_finalized {
return MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
let metadata = self.process_magic_types();
self.is_finalized = true;
MetaPluginResponse {
metadata,
is_finalized: true,
}
}
fn meta_type(&self) -> MetaPluginType {
MetaPluginType::MagicFile
}
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
self.base.outputs()
}
fn outputs_mut(
&mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.outputs_mut())
}
fn default_outputs(&self) -> Vec<String> {
vec![
"mime_type".to_string(),
"mime_encoding".to_string(),
"file_type".to_string(),
]
}
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
self.base.options()
}
fn options_mut(
&mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.options_mut())
}
fn parallel_safe(&self) -> bool {
true
}
}
#[cfg(feature = "magic")]
pub use MagicFileMetaPluginImpl as MagicFileMetaPlugin;
#[cfg(not(feature = "magic"))]
#[derive(Debug)]
pub struct FallbackMagicFileMetaPlugin {
buffer: Vec<u8>,
max_buffer_size: usize,
is_finalized: bool,
base: BaseMetaPlugin,
}
#[cfg(not(feature = "magic"))]
impl FallbackMagicFileMetaPlugin {
pub fn new(
options: Option<std::collections::HashMap<String, serde_yaml::Value>>,
outputs: Option<std::collections::HashMap<String, serde_yaml::Value>>,
) -> Self {
let mut base = BaseMetaPlugin::new();
let default_outputs = &["mime_type", "mime_encoding", "file_type"];
base.initialize_plugin(default_outputs, &options, &outputs);
let max_buffer_size = base
.options
.get("max_buffer_size")
.and_then(|v| v.as_u64())
.unwrap_or(crate::common::PIPESIZE as u64) as usize;
Self {
buffer: Vec::new(),
max_buffer_size,
is_finalized: false,
base,
}
}
fn run_file_command(&self, args: &[&str]) -> Option<String> {
let output = Command::new("file")
.args(args)
.arg("-")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.and_then(|mut child| {
if let Some(mut stdin) = child.stdin.take() {
let _ = stdin.write_all(&self.buffer);
}
child.wait_with_output()
});
output
.ok()
.map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
}
fn detect_type(&self) -> Vec<MetaData> {
let mut metadata = Vec::new();
// Get mime_type and mime_encoding via --mime
if let Some(mime_line) = self.run_file_command(&["--brief", "--mime"]) {
// Format: "text/plain; charset=us-ascii"
if let Some((mime_type, rest)) = mime_line.split_once(';') {
let mime_type = mime_type.trim().to_string();
let mime_encoding = rest
.trim()
.strip_prefix("charset=")
.unwrap_or("binary")
.to_string();
if let Some(meta_data) = process_metadata_outputs(
"mime_type",
serde_yaml::Value::String(mime_type),
self.base.outputs(),
) {
metadata.push(meta_data);
}
if let Some(meta_data) = process_metadata_outputs(
"mime_encoding",
serde_yaml::Value::String(mime_encoding),
self.base.outputs(),
) {
metadata.push(meta_data);
}
} else {
// No charset, just mime type
if let Some(meta_data) = process_metadata_outputs(
"mime_type",
serde_yaml::Value::String(mime_line),
self.base.outputs(),
) {
metadata.push(meta_data);
}
}
}
// Get human-readable file type via --brief
if let Some(file_type) = self.run_file_command(&["--brief"])
&& !file_type.is_empty()
&& let Some(meta_data) = process_metadata_outputs(
"file_type",
serde_yaml::Value::String(file_type),
self.base.outputs(),
)
{
metadata.push(meta_data);
}
metadata
}
}
#[cfg(not(feature = "magic"))]
impl MetaPlugin for FallbackMagicFileMetaPlugin {
fn is_finalized(&self) -> bool {
self.is_finalized
}
fn set_finalized(&mut self, finalized: bool) {
self.is_finalized = finalized;
}
fn set_save_meta(&mut self, save_meta: crate::meta_plugin::SaveMetaFn) {
self.base.set_save_meta(save_meta);
}
fn save_meta(&self, name: &str, value: &str) {
self.base.save_meta(name, value);
}
fn initialize(&mut self) -> MetaPluginResponse {
MetaPluginResponse {
metadata: Vec::new(),
is_finalized: false,
}
}
fn update(&mut self, data: &[u8]) -> MetaPluginResponse {
if self.is_finalized {
return MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
let remaining = self.max_buffer_size.saturating_sub(self.buffer.len());
if remaining > 0 {
let n = std::cmp::min(data.len(), remaining);
self.buffer.extend_from_slice(&data[..n]);
if self.buffer.len() >= self.max_buffer_size {
let metadata = self.detect_type();
self.is_finalized = true;
return MetaPluginResponse {
metadata,
is_finalized: true,
};
}
}
MetaPluginResponse {
metadata: Vec::new(),
is_finalized: false,
}
}
fn finalize(&mut self) -> MetaPluginResponse {
if self.is_finalized {
return MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
self.is_finalized = true;
MetaPluginResponse {
metadata: self.detect_type(),
is_finalized: true,
}
}
fn meta_type(&self) -> MetaPluginType {
MetaPluginType::MagicFile
}
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
self.base.outputs()
}
fn outputs_mut(
&mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.outputs_mut())
}
fn default_outputs(&self) -> Vec<String> {
vec![
"mime_type".to_string(),
"mime_encoding".to_string(),
"file_type".to_string(),
]
}
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
self.base.options()
}
fn options_mut(
&mut self,
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
Ok(self.base.options_mut())
}
fn parallel_safe(&self) -> bool {
true
}
}
#[cfg(not(feature = "magic"))]
pub use FallbackMagicFileMetaPlugin as MagicFileMetaPlugin;
use crate::meta_plugin::register_meta_plugin;
#[ctor::ctor]
fn register_magic_file_plugin() {
register_meta_plugin(MetaPluginType::MagicFile, |options, outputs| {
Box::new(MagicFileMetaPlugin::new(options, outputs))
})
.expect("Failed to register MagicFileMetaPlugin");
}