- Add SaveMetaFn callback pattern: meta plugins receive a closure instead of
&Connection, enabling the same plugin code to work in local, client, and
server contexts (collect-to-Vec, collect-to-HashMap, or direct DB write)
- Client save now runs meta plugins locally during streaming (smart client
sets meta=false, server skips its own plugins)
- Add POST /api/item/{id}/update endpoint for re-running plugins on stored
content without downloading compressed data
- Add client update mode (--update with --meta-plugin flags)
- Extract shared utilities: stream_copy, print_serialized, build_path_table,
ensure_default_tag to reduce duplication across modes
- Add upsert_tag for idempotent tag addition (INSERT OR IGNORE)
- Add warn logging on save_meta lock failure in BaseMetaPlugin and MetaService
453 lines
13 KiB
Rust
453 lines
13 KiB
Rust
#[cfg(feature = "magic")]
|
|
use magic::{Cookie, CookieFlags};
|
|
#[cfg(not(feature = "magic"))]
|
|
use std::process::{Command, Stdio};
|
|
|
|
use std::io::{self, Write};
|
|
use std::path::Path;
|
|
|
|
use crate::meta_plugin::{
|
|
BaseMetaPlugin, MetaData, MetaPlugin, MetaPluginResponse, MetaPluginType,
|
|
process_metadata_outputs,
|
|
};
|
|
|
|
// Thread-local libmagic cookie, lazily initialized on first access per thread.
|
|
// Each thread gets its own independent Cookie instance. Libmagic documents that
|
|
// separate cookies can be used from different threads concurrently without
|
|
// synchronization. Using thread_local! avoids unsafe impl Send since the
|
|
// storage is inherently !Send.
|
|
#[cfg(feature = "magic")]
|
|
thread_local! {
|
|
static MAGIC_COOKIE: std::cell::RefCell<Option<Cookie>> = const { std::cell::RefCell::new(None) };
|
|
}
|
|
|
|
#[cfg(feature = "magic")]
|
|
#[derive(Debug)]
|
|
pub struct MagicFileMetaPluginImpl {
|
|
buffer: Vec<u8>,
|
|
max_buffer_size: usize,
|
|
is_finalized: bool,
|
|
base: BaseMetaPlugin,
|
|
}
|
|
|
|
#[cfg(feature = "magic")]
|
|
impl MagicFileMetaPluginImpl {
|
|
pub fn new(
|
|
options: Option<std::collections::HashMap<String, serde_yaml::Value>>,
|
|
outputs: Option<std::collections::HashMap<String, serde_yaml::Value>>,
|
|
) -> MagicFileMetaPluginImpl {
|
|
let mut base = BaseMetaPlugin::new();
|
|
|
|
// Set default outputs
|
|
let default_outputs = &["mime_type", "mime_encoding", "file_type"];
|
|
base.initialize_plugin(default_outputs, &options, &outputs);
|
|
|
|
// Get max_buffer_size from options, default to PIPESIZE
|
|
let max_buffer_size = base
|
|
.options
|
|
.get("max_buffer_size")
|
|
.and_then(|v| v.as_u64())
|
|
.unwrap_or(crate::common::PIPESIZE as u64) as usize;
|
|
|
|
MagicFileMetaPluginImpl {
|
|
buffer: Vec::new(),
|
|
max_buffer_size,
|
|
is_finalized: false,
|
|
base,
|
|
}
|
|
}
|
|
|
|
fn get_magic_result(&self, flags: CookieFlags) -> io::Result<String> {
|
|
MAGIC_COOKIE.with(|cell| {
|
|
// Lazy init: create cookie on first access per thread
|
|
{
|
|
let mut opt = cell.borrow_mut();
|
|
if opt.is_none() {
|
|
let cookie = Cookie::open(CookieFlags::default())
|
|
.map_err(|e| io::Error::other(format!("Failed to open magic: {e}")))?;
|
|
cookie.load(&[] as &[&Path]).map_err(|e| {
|
|
io::Error::other(format!("Failed to load magic database: {e}"))
|
|
})?;
|
|
*opt = Some(cookie);
|
|
}
|
|
}
|
|
|
|
let cookie_ref = cell.borrow();
|
|
let cookie = cookie_ref.as_ref().expect("cookie initialized above");
|
|
|
|
cookie
|
|
.set_flags(flags)
|
|
.map_err(|e| io::Error::other(format!("Failed to set magic flags: {e}")))?;
|
|
|
|
let result = cookie
|
|
.buffer(&self.buffer)
|
|
.map_err(|e| io::Error::other(format!("Failed to analyze buffer: {e}")))?;
|
|
|
|
Ok(result.trim().to_string())
|
|
})
|
|
}
|
|
|
|
fn process_magic_types(&self) -> Vec<MetaData> {
|
|
let mut metadata = Vec::new();
|
|
|
|
let types_to_process = [
|
|
("mime_type", CookieFlags::MIME_TYPE),
|
|
("mime_encoding", CookieFlags::MIME_ENCODING),
|
|
("file_type", CookieFlags::empty()),
|
|
];
|
|
|
|
for (name, flags) in types_to_process.iter() {
|
|
if let Ok(result) = self.get_magic_result(*flags)
|
|
&& !result.is_empty()
|
|
&& let Some(meta_data) = process_metadata_outputs(
|
|
name,
|
|
serde_yaml::Value::String(result),
|
|
self.base.outputs(),
|
|
)
|
|
{
|
|
metadata.push(meta_data);
|
|
}
|
|
}
|
|
|
|
metadata
|
|
}
|
|
}
|
|
|
|
#[cfg(feature = "magic")]
|
|
impl MetaPlugin for MagicFileMetaPluginImpl {
|
|
fn is_finalized(&self) -> bool {
|
|
self.is_finalized
|
|
}
|
|
|
|
fn set_finalized(&mut self, finalized: bool) {
|
|
self.is_finalized = finalized;
|
|
}
|
|
|
|
fn set_save_meta(&mut self, save_meta: crate::meta_plugin::SaveMetaFn) {
|
|
self.base.set_save_meta(save_meta);
|
|
}
|
|
|
|
fn save_meta(&self, name: &str, value: &str) {
|
|
self.base.save_meta(name, value);
|
|
}
|
|
|
|
fn initialize(&mut self) -> MetaPluginResponse {
|
|
// Cookie is lazily initialized in the thread-local on first use.
|
|
MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: false,
|
|
}
|
|
}
|
|
|
|
fn update(&mut self, data: &[u8]) -> MetaPluginResponse {
|
|
if self.is_finalized {
|
|
return MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: true,
|
|
};
|
|
}
|
|
|
|
let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len());
|
|
if remaining_capacity > 0 {
|
|
let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity);
|
|
self.buffer.extend_from_slice(&data[..bytes_to_copy]);
|
|
|
|
if self.buffer.len() >= self.max_buffer_size {
|
|
let metadata = self.process_magic_types();
|
|
self.is_finalized = true;
|
|
return MetaPluginResponse {
|
|
metadata,
|
|
is_finalized: true,
|
|
};
|
|
}
|
|
}
|
|
|
|
MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: false,
|
|
}
|
|
}
|
|
|
|
fn finalize(&mut self) -> MetaPluginResponse {
|
|
if self.is_finalized {
|
|
return MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: true,
|
|
};
|
|
}
|
|
|
|
let metadata = self.process_magic_types();
|
|
self.is_finalized = true;
|
|
|
|
MetaPluginResponse {
|
|
metadata,
|
|
is_finalized: true,
|
|
}
|
|
}
|
|
|
|
fn meta_type(&self) -> MetaPluginType {
|
|
MetaPluginType::MagicFile
|
|
}
|
|
|
|
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
|
self.base.outputs()
|
|
}
|
|
|
|
fn outputs_mut(
|
|
&mut self,
|
|
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
|
|
Ok(self.base.outputs_mut())
|
|
}
|
|
|
|
fn default_outputs(&self) -> Vec<String> {
|
|
vec![
|
|
"mime_type".to_string(),
|
|
"mime_encoding".to_string(),
|
|
"file_type".to_string(),
|
|
]
|
|
}
|
|
|
|
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
|
self.base.options()
|
|
}
|
|
|
|
fn options_mut(
|
|
&mut self,
|
|
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
|
|
Ok(self.base.options_mut())
|
|
}
|
|
|
|
fn parallel_safe(&self) -> bool {
|
|
true
|
|
}
|
|
}
|
|
|
|
#[cfg(feature = "magic")]
|
|
pub use MagicFileMetaPluginImpl as MagicFileMetaPlugin;
|
|
|
|
#[cfg(not(feature = "magic"))]
|
|
#[derive(Debug)]
|
|
pub struct FallbackMagicFileMetaPlugin {
|
|
buffer: Vec<u8>,
|
|
max_buffer_size: usize,
|
|
is_finalized: bool,
|
|
base: BaseMetaPlugin,
|
|
}
|
|
|
|
#[cfg(not(feature = "magic"))]
|
|
impl FallbackMagicFileMetaPlugin {
|
|
pub fn new(
|
|
options: Option<std::collections::HashMap<String, serde_yaml::Value>>,
|
|
outputs: Option<std::collections::HashMap<String, serde_yaml::Value>>,
|
|
) -> Self {
|
|
let mut base = BaseMetaPlugin::new();
|
|
let default_outputs = &["mime_type", "mime_encoding", "file_type"];
|
|
base.initialize_plugin(default_outputs, &options, &outputs);
|
|
|
|
let max_buffer_size = base
|
|
.options
|
|
.get("max_buffer_size")
|
|
.and_then(|v| v.as_u64())
|
|
.unwrap_or(crate::common::PIPESIZE as u64) as usize;
|
|
|
|
Self {
|
|
buffer: Vec::new(),
|
|
max_buffer_size,
|
|
is_finalized: false,
|
|
base,
|
|
}
|
|
}
|
|
|
|
fn run_file_command(&self, args: &[&str]) -> Option<String> {
|
|
let output = Command::new("file")
|
|
.args(args)
|
|
.arg("-")
|
|
.stdin(Stdio::piped())
|
|
.stdout(Stdio::piped())
|
|
.spawn()
|
|
.and_then(|mut child| {
|
|
if let Some(mut stdin) = child.stdin.take() {
|
|
let _ = stdin.write_all(&self.buffer);
|
|
}
|
|
child.wait_with_output()
|
|
});
|
|
|
|
output
|
|
.ok()
|
|
.map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
|
|
}
|
|
|
|
fn detect_type(&self) -> Vec<MetaData> {
|
|
let mut metadata = Vec::new();
|
|
|
|
// Get mime_type and mime_encoding via --mime
|
|
if let Some(mime_line) = self.run_file_command(&["--brief", "--mime"]) {
|
|
// Format: "text/plain; charset=us-ascii"
|
|
if let Some((mime_type, rest)) = mime_line.split_once(';') {
|
|
let mime_type = mime_type.trim().to_string();
|
|
let mime_encoding = rest
|
|
.trim()
|
|
.strip_prefix("charset=")
|
|
.unwrap_or("binary")
|
|
.to_string();
|
|
|
|
if let Some(meta_data) = process_metadata_outputs(
|
|
"mime_type",
|
|
serde_yaml::Value::String(mime_type),
|
|
self.base.outputs(),
|
|
) {
|
|
metadata.push(meta_data);
|
|
}
|
|
if let Some(meta_data) = process_metadata_outputs(
|
|
"mime_encoding",
|
|
serde_yaml::Value::String(mime_encoding),
|
|
self.base.outputs(),
|
|
) {
|
|
metadata.push(meta_data);
|
|
}
|
|
} else {
|
|
// No charset, just mime type
|
|
if let Some(meta_data) = process_metadata_outputs(
|
|
"mime_type",
|
|
serde_yaml::Value::String(mime_line),
|
|
self.base.outputs(),
|
|
) {
|
|
metadata.push(meta_data);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Get human-readable file type via --brief
|
|
if let Some(file_type) = self.run_file_command(&["--brief"])
|
|
&& !file_type.is_empty()
|
|
&& let Some(meta_data) = process_metadata_outputs(
|
|
"file_type",
|
|
serde_yaml::Value::String(file_type),
|
|
self.base.outputs(),
|
|
)
|
|
{
|
|
metadata.push(meta_data);
|
|
}
|
|
|
|
metadata
|
|
}
|
|
}
|
|
|
|
#[cfg(not(feature = "magic"))]
|
|
impl MetaPlugin for FallbackMagicFileMetaPlugin {
|
|
fn is_finalized(&self) -> bool {
|
|
self.is_finalized
|
|
}
|
|
|
|
fn set_finalized(&mut self, finalized: bool) {
|
|
self.is_finalized = finalized;
|
|
}
|
|
|
|
fn set_save_meta(&mut self, save_meta: crate::meta_plugin::SaveMetaFn) {
|
|
self.base.set_save_meta(save_meta);
|
|
}
|
|
|
|
fn save_meta(&self, name: &str, value: &str) {
|
|
self.base.save_meta(name, value);
|
|
}
|
|
|
|
fn initialize(&mut self) -> MetaPluginResponse {
|
|
MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: false,
|
|
}
|
|
}
|
|
|
|
fn update(&mut self, data: &[u8]) -> MetaPluginResponse {
|
|
if self.is_finalized {
|
|
return MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: true,
|
|
};
|
|
}
|
|
|
|
let remaining = self.max_buffer_size.saturating_sub(self.buffer.len());
|
|
if remaining > 0 {
|
|
let n = std::cmp::min(data.len(), remaining);
|
|
self.buffer.extend_from_slice(&data[..n]);
|
|
|
|
if self.buffer.len() >= self.max_buffer_size {
|
|
let metadata = self.detect_type();
|
|
self.is_finalized = true;
|
|
return MetaPluginResponse {
|
|
metadata,
|
|
is_finalized: true,
|
|
};
|
|
}
|
|
}
|
|
|
|
MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: false,
|
|
}
|
|
}
|
|
|
|
fn finalize(&mut self) -> MetaPluginResponse {
|
|
if self.is_finalized {
|
|
return MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: true,
|
|
};
|
|
}
|
|
self.is_finalized = true;
|
|
MetaPluginResponse {
|
|
metadata: self.detect_type(),
|
|
is_finalized: true,
|
|
}
|
|
}
|
|
|
|
fn meta_type(&self) -> MetaPluginType {
|
|
MetaPluginType::MagicFile
|
|
}
|
|
|
|
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
|
self.base.outputs()
|
|
}
|
|
|
|
fn outputs_mut(
|
|
&mut self,
|
|
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
|
|
Ok(self.base.outputs_mut())
|
|
}
|
|
|
|
fn default_outputs(&self) -> Vec<String> {
|
|
vec![
|
|
"mime_type".to_string(),
|
|
"mime_encoding".to_string(),
|
|
"file_type".to_string(),
|
|
]
|
|
}
|
|
|
|
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
|
self.base.options()
|
|
}
|
|
|
|
fn options_mut(
|
|
&mut self,
|
|
) -> anyhow::Result<&mut std::collections::HashMap<String, serde_yaml::Value>> {
|
|
Ok(self.base.options_mut())
|
|
}
|
|
|
|
fn parallel_safe(&self) -> bool {
|
|
true
|
|
}
|
|
}
|
|
|
|
#[cfg(not(feature = "magic"))]
|
|
pub use FallbackMagicFileMetaPlugin as MagicFileMetaPlugin;
|
|
|
|
use crate::meta_plugin::register_meta_plugin;
|
|
|
|
#[ctor::ctor]
|
|
fn register_magic_file_plugin() {
|
|
register_meta_plugin(MetaPluginType::MagicFile, |options, outputs| {
|
|
Box::new(MagicFileMetaPlugin::new(options, outputs))
|
|
})
|
|
.expect("Failed to register MagicFileMetaPlugin");
|
|
}
|