Critical bug fixes:
- save_item now returns real Item from database, not a hardcoded fake
- AsyncDataService::save() reuses self.sync_service instead of creating redundant instance
- GenerateStatus trait signature mismatch fixed (CLI/API decoupling)
Performance improvements (pipe path untouched):
- CompressionEngine::open() returns Box<dyn Read + Send> enabling true streaming
- mode_get eliminates triple full-file read (was sampling then re-reading entire file)
- FilteringReader adds fast-path bypass when no filters, pre-allocates temp buffer
- text.rs meta plugin processes &[u8] slice directly, eliminates data.to_vec() clone
API correctness:
- Tag parse errors now return 400 instead of being silently discarded
- compute_diff uses similar crate (LCS-based) instead of naive positional comparison
Cleanup:
- Modernize string formatting (format!({x})) across codebase
- Remove redundant DB query in get mode
- Derive Debug/ToSchema on public types
- Delete placeholder test files with no real assertions
- Extract parse_comma_tags utility function
441 lines
13 KiB
Rust
441 lines
13 KiB
Rust
#[cfg(feature = "magic")]
|
|
use magic::{Cookie, CookieFlags};
|
|
#[cfg(not(feature = "magic"))]
|
|
use std::process::{Command, Stdio};
|
|
|
|
use log::debug;
|
|
use std::io::{self, Write};
|
|
use std::path::Path;
|
|
|
|
use crate::meta_plugin::{
|
|
BaseMetaPlugin, MetaData, MetaPlugin, MetaPluginResponse, MetaPluginType,
|
|
process_metadata_outputs,
|
|
};
|
|
|
|
#[cfg(feature = "magic")]
|
|
#[derive(Debug)]
|
|
pub struct MagicFileMetaPluginImpl {
|
|
buffer: Vec<u8>,
|
|
max_buffer_size: usize,
|
|
is_finalized: bool,
|
|
cookie: Option<Cookie>,
|
|
base: BaseMetaPlugin,
|
|
}
|
|
|
|
#[cfg(feature = "magic")]
|
|
impl MagicFileMetaPluginImpl {
|
|
pub fn new(
|
|
options: Option<std::collections::HashMap<String, serde_yaml::Value>>,
|
|
outputs: Option<std::collections::HashMap<String, serde_yaml::Value>>,
|
|
) -> MagicFileMetaPluginImpl {
|
|
let mut base = BaseMetaPlugin::new();
|
|
|
|
// Set default outputs
|
|
let default_outputs = &["mime_type", "mime_encoding", "file_type"];
|
|
base.initialize_plugin(default_outputs, &options, &outputs);
|
|
|
|
// Get max_buffer_size from options, default to PIPESIZE
|
|
let max_buffer_size = base
|
|
.options
|
|
.get("max_buffer_size")
|
|
.and_then(|v| v.as_u64())
|
|
.unwrap_or(crate::common::PIPESIZE as u64) as usize;
|
|
|
|
MagicFileMetaPluginImpl {
|
|
buffer: Vec::new(),
|
|
max_buffer_size,
|
|
is_finalized: false,
|
|
cookie: None,
|
|
base,
|
|
}
|
|
}
|
|
|
|
fn get_magic_result(&self, flags: CookieFlags) -> io::Result<String> {
|
|
if let Some(cookie) = &self.cookie {
|
|
cookie
|
|
.set_flags(flags)
|
|
.map_err(|e| io::Error::other(format!("Failed to set magic flags: {e}")))?;
|
|
|
|
let result = cookie
|
|
.buffer(&self.buffer)
|
|
.map_err(|e| io::Error::other(format!("Failed to analyze buffer: {e}")))?;
|
|
|
|
// Clean up the result - remove extra whitespace
|
|
let trimmed = result.trim().to_string();
|
|
|
|
Ok(trimmed)
|
|
} else {
|
|
Err(io::Error::other("Magic cookie not initialized"))
|
|
}
|
|
}
|
|
|
|
fn process_magic_types(&self) -> Vec<MetaData> {
|
|
let mut metadata = Vec::new();
|
|
|
|
let types_to_process = [
|
|
("mime_type", CookieFlags::MIME_TYPE),
|
|
("mime_encoding", CookieFlags::MIME_ENCODING),
|
|
("file_type", CookieFlags::empty()),
|
|
];
|
|
|
|
for (name, flags) in types_to_process.iter() {
|
|
if let Ok(result) = self.get_magic_result(*flags)
|
|
&& !result.is_empty()
|
|
&& let Some(meta_data) = process_metadata_outputs(
|
|
name,
|
|
serde_yaml::Value::String(result),
|
|
self.base.outputs(),
|
|
)
|
|
{
|
|
metadata.push(meta_data);
|
|
}
|
|
}
|
|
|
|
metadata
|
|
}
|
|
}
|
|
|
|
#[cfg(feature = "magic")]
|
|
impl MetaPlugin for MagicFileMetaPluginImpl {
|
|
fn is_finalized(&self) -> bool {
|
|
self.is_finalized
|
|
}
|
|
|
|
fn set_finalized(&mut self, finalized: bool) {
|
|
self.is_finalized = finalized;
|
|
}
|
|
|
|
fn initialize(&mut self) -> MetaPluginResponse {
|
|
let cookie = match Cookie::open(CookieFlags::default()) {
|
|
Ok(cookie) => cookie,
|
|
Err(e) => {
|
|
debug!("META: MagicFile plugin: failed to create cookie: {e}");
|
|
return MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: true,
|
|
};
|
|
}
|
|
};
|
|
|
|
if let Err(e) = cookie.load(&[] as &[&Path]) {
|
|
debug!("META: MagicFile plugin: failed to load magic database: {e}");
|
|
return MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: true,
|
|
};
|
|
}
|
|
|
|
self.cookie = Some(cookie);
|
|
|
|
MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: false,
|
|
}
|
|
}
|
|
|
|
fn update(&mut self, data: &[u8]) -> MetaPluginResponse {
|
|
if self.is_finalized {
|
|
return MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: true,
|
|
};
|
|
}
|
|
|
|
let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len());
|
|
if remaining_capacity > 0 {
|
|
let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity);
|
|
self.buffer.extend_from_slice(&data[..bytes_to_copy]);
|
|
|
|
if self.buffer.len() >= self.max_buffer_size {
|
|
let metadata = self.process_magic_types();
|
|
self.is_finalized = true;
|
|
return MetaPluginResponse {
|
|
metadata,
|
|
is_finalized: true,
|
|
};
|
|
}
|
|
}
|
|
|
|
MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: false,
|
|
}
|
|
}
|
|
|
|
fn finalize(&mut self) -> MetaPluginResponse {
|
|
if self.is_finalized {
|
|
return MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: true,
|
|
};
|
|
}
|
|
|
|
let metadata = self.process_magic_types();
|
|
self.is_finalized = true;
|
|
|
|
MetaPluginResponse {
|
|
metadata,
|
|
is_finalized: true,
|
|
}
|
|
}
|
|
|
|
fn meta_type(&self) -> MetaPluginType {
|
|
MetaPluginType::MagicFile
|
|
}
|
|
|
|
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
|
self.base.outputs()
|
|
}
|
|
|
|
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
|
|
self.base.outputs_mut()
|
|
}
|
|
|
|
fn default_outputs(&self) -> Vec<String> {
|
|
vec![
|
|
"mime_type".to_string(),
|
|
"mime_encoding".to_string(),
|
|
"file_type".to_string(),
|
|
]
|
|
}
|
|
|
|
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
|
self.base.options()
|
|
}
|
|
|
|
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
|
|
self.base.options_mut()
|
|
}
|
|
}
|
|
|
|
#[cfg(not(feature = "magic"))]
|
|
#[derive(Debug)]
|
|
pub struct FallbackMagicFileMetaPlugin {
|
|
buffer: Vec<u8>,
|
|
max_buffer_size: usize,
|
|
is_finalized: bool,
|
|
base: BaseMetaPlugin,
|
|
}
|
|
|
|
#[cfg(not(feature = "magic"))]
|
|
impl FallbackMagicFileMetaPlugin {
|
|
pub fn new(
|
|
options: Option<std::collections::HashMap<String, serde_yaml::Value>>,
|
|
outputs: Option<std::collections::HashMap<String, serde_yaml::Value>>,
|
|
) -> FallbackMagicFileMetaPlugin {
|
|
let mut base = BaseMetaPlugin::new();
|
|
|
|
// Set default outputs
|
|
let default_outputs = &["mime_type", "mime_encoding", "file_type"];
|
|
base.initialize_plugin(default_outputs, &options, &outputs);
|
|
|
|
// Get max_buffer_size from options, default to PIPESIZE
|
|
let max_buffer_size = base
|
|
.options
|
|
.get("max_buffer_size")
|
|
.and_then(|v| v.as_u64())
|
|
.unwrap_or(crate::common::PIPESIZE as u64) as usize;
|
|
|
|
FallbackMagicFileMetaPlugin {
|
|
buffer: Vec::new(),
|
|
max_buffer_size,
|
|
is_finalized: false,
|
|
base,
|
|
}
|
|
}
|
|
|
|
fn run_file_command(&self, buffer: &[u8]) -> io::Result<String> {
|
|
let mut temp_file = tempfile::NamedTempFile::new()?;
|
|
temp_file.as_ref().write_all(buffer)?;
|
|
|
|
let output = Command::new("file")
|
|
.arg("-b")
|
|
.arg("-m")
|
|
.arg("all")
|
|
.arg(temp_file.path())
|
|
.output()
|
|
.map_err(|e| {
|
|
io::Error::new(
|
|
io::ErrorKind::Other,
|
|
format!("Failed to run file command: {}", e),
|
|
)
|
|
})?;
|
|
|
|
if !output.status.success() {
|
|
return Err(io::Error::new(io::ErrorKind::Other, "File command failed"));
|
|
}
|
|
|
|
let result = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
|
Ok(result)
|
|
}
|
|
|
|
fn process_file_output(&self, result: &str) -> Vec<MetaData> {
|
|
let mut metadata = Vec::new();
|
|
|
|
// Parse the file command output
|
|
// file -m all output format is typically: type; charset=encoding
|
|
let parts: Vec<&str> = result.split(';').map(|s| s.trim()).collect();
|
|
let file_type = parts.first().cloned().unwrap_or(result);
|
|
let mime_encoding = parts
|
|
.get(1)
|
|
.and_then(|s| s.strip_prefix("charset="))
|
|
.cloned()
|
|
.unwrap_or("");
|
|
|
|
// For mime_type, try to infer from file type or use a heuristic
|
|
let mime_type = if file_type.starts_with("text") {
|
|
"text/plain"
|
|
} else if file_type.contains("ASCII") || file_type.contains("UTF-8") {
|
|
"text/plain"
|
|
} else if file_type.contains("empty") {
|
|
"application/octet-stream"
|
|
} else {
|
|
"application/octet-stream" // default
|
|
};
|
|
|
|
let outputs_to_process = [
|
|
("mime_type", mime_type),
|
|
("mime_encoding", mime_encoding),
|
|
("file_type", file_type),
|
|
];
|
|
|
|
for (name, value) in outputs_to_process.iter() {
|
|
if let Some(meta_data) = process_metadata_outputs(
|
|
name,
|
|
serde_yaml::Value::String(value.to_string()),
|
|
self.base.outputs(),
|
|
) {
|
|
metadata.push(meta_data);
|
|
}
|
|
}
|
|
|
|
metadata
|
|
}
|
|
}
|
|
|
|
#[cfg(not(feature = "magic"))]
|
|
impl MetaPlugin for FallbackMagicFileMetaPlugin {
|
|
fn is_finalized(&self) -> bool {
|
|
self.is_finalized
|
|
}
|
|
|
|
fn set_finalized(&mut self, finalized: bool) {
|
|
self.is_finalized = finalized;
|
|
}
|
|
|
|
fn initialize(&mut self) -> MetaPluginResponse {
|
|
// No initialization needed for fallback
|
|
MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: false,
|
|
}
|
|
}
|
|
|
|
fn update(&mut self, data: &[u8]) -> MetaPluginResponse {
|
|
if self.is_finalized {
|
|
return MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: true,
|
|
};
|
|
}
|
|
|
|
let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len());
|
|
if remaining_capacity > 0 {
|
|
let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity);
|
|
self.buffer.extend_from_slice(&data[..bytes_to_copy]);
|
|
|
|
if self.buffer.len() >= self.max_buffer_size {
|
|
if let Ok(result) = self.run_file_command(&self.buffer) {
|
|
let metadata = self.process_file_output(&result);
|
|
self.is_finalized = true;
|
|
return MetaPluginResponse {
|
|
metadata,
|
|
is_finalized: true,
|
|
};
|
|
} else {
|
|
// On error, finalize with empty metadata
|
|
self.is_finalized = true;
|
|
return MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: true,
|
|
};
|
|
}
|
|
}
|
|
}
|
|
|
|
MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: false,
|
|
}
|
|
}
|
|
|
|
fn finalize(&mut self) -> MetaPluginResponse {
|
|
if self.is_finalized {
|
|
return MetaPluginResponse {
|
|
metadata: Vec::new(),
|
|
is_finalized: true,
|
|
};
|
|
}
|
|
|
|
let metadata = if !self.buffer.is_empty() {
|
|
if let Ok(result) = self.run_file_command(&self.buffer) {
|
|
self.process_file_output(&result)
|
|
} else {
|
|
Vec::new()
|
|
}
|
|
} else {
|
|
Vec::new()
|
|
};
|
|
|
|
self.is_finalized = true;
|
|
|
|
MetaPluginResponse {
|
|
metadata,
|
|
is_finalized: true,
|
|
}
|
|
}
|
|
|
|
fn meta_type(&self) -> MetaPluginType {
|
|
MetaPluginType::MagicFile
|
|
}
|
|
|
|
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
|
self.base.outputs()
|
|
}
|
|
|
|
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
|
|
self.base.outputs_mut()
|
|
}
|
|
|
|
fn default_outputs(&self) -> Vec<String> {
|
|
vec![
|
|
"mime_type".to_string(),
|
|
"mime_encoding".to_string(),
|
|
"file_type".to_string(),
|
|
]
|
|
}
|
|
|
|
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
|
self.base.options()
|
|
}
|
|
|
|
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
|
|
self.base.options_mut()
|
|
}
|
|
}
|
|
|
|
#[cfg(feature = "magic")]
|
|
pub use MagicFileMetaPluginImpl as MagicFileMetaPlugin;
|
|
|
|
#[cfg(not(feature = "magic"))]
|
|
pub use FallbackMagicFileMetaPlugin as MagicFileMetaPlugin;
|
|
|
|
use crate::meta_plugin::register_meta_plugin;
|
|
|
|
#[ctor::ctor]
|
|
fn register_magic_file_plugin() {
|
|
register_meta_plugin(MetaPluginType::MagicFile, |options, outputs| {
|
|
Box::new(MagicFileMetaPlugin::new(options, outputs))
|
|
});
|
|
}
|