Files
keep/src/meta_plugin/magic_file.rs

422 lines
13 KiB
Rust

#[cfg(feature = "magic")]
use magic::{Cookie, CookieFlags};
#[cfg(not(feature = "magic"))]
use std::process::{Command, Stdio};
use std::io::{self, Write};
use std::path::Path;
use log::debug;
use crate::meta_plugin::{MetaPlugin, MetaPluginType, BaseMetaPlugin, MetaPluginResponse, MetaData, process_metadata_outputs};
#[cfg(feature = "magic")]
#[derive(Debug)]
pub struct MagicFileMetaPluginImpl {
buffer: Vec<u8>,
max_buffer_size: usize,
is_finalized: bool,
cookie: Option<Cookie>,
base: BaseMetaPlugin,
}
#[cfg(feature = "magic")]
impl MagicFileMetaPluginImpl {
pub fn new(
options: Option<std::collections::HashMap<String, serde_yaml::Value>>,
outputs: Option<std::collections::HashMap<String, serde_yaml::Value>>,
) -> MagicFileMetaPluginImpl {
let mut base = BaseMetaPlugin::new();
// Set default outputs
let default_outputs = &["mime_type", "mime_encoding", "file_type"];
base.initialize_plugin(default_outputs, &options, &outputs);
// Get max_buffer_size from options, default to PIPESIZE
let max_buffer_size = base.options
.get("max_buffer_size")
.and_then(|v| v.as_u64())
.unwrap_or(crate::common::PIPESIZE as u64) as usize;
MagicFileMetaPluginImpl {
buffer: Vec::new(),
max_buffer_size,
is_finalized: false,
cookie: None,
base,
}
}
fn get_magic_result(&self, flags: CookieFlags) -> io::Result<String> {
if let Some(cookie) = &self.cookie {
cookie.set_flags(flags)
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to set magic flags: {}", e)))?;
let result = cookie.buffer(&self.buffer)
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?;
// Clean up the result - remove extra whitespace
let trimmed = result.trim().to_string();
Ok(trimmed)
} else {
Err(io::Error::new(io::ErrorKind::Other, "Magic cookie not initialized"))
}
}
fn process_magic_types(&self) -> Vec<MetaData> {
let mut metadata = Vec::new();
let types_to_process = [
("mime_type", CookieFlags::MIME_TYPE),
("mime_encoding", CookieFlags::MIME_ENCODING),
("file_type", CookieFlags::empty()),
];
for (name, flags) in types_to_process.iter() {
if let Ok(result) = self.get_magic_result(*flags) {
if !result.is_empty() {
if let Some(meta_data) = process_metadata_outputs(
name,
serde_yaml::Value::String(result),
self.base.outputs(),
) {
metadata.push(meta_data);
}
}
}
}
metadata
}
}
#[cfg(feature = "magic")]
impl MetaPlugin for MagicFileMetaPluginImpl {
fn is_finalized(&self) -> bool {
self.is_finalized
}
fn set_finalized(&mut self, finalized: bool) {
self.is_finalized = finalized;
}
fn initialize(&mut self) -> MetaPluginResponse {
let cookie = match Cookie::open(CookieFlags::default()) {
Ok(cookie) => cookie,
Err(e) => {
debug!("META: MagicFile plugin: failed to create cookie: {}", e);
return MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
};
if let Err(e) = cookie.load(&[] as &[&Path]) {
debug!("META: MagicFile plugin: failed to load magic database: {}", e);
return MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
self.cookie = Some(cookie);
MetaPluginResponse {
metadata: Vec::new(),
is_finalized: false,
}
}
fn update(&mut self, data: &[u8]) -> MetaPluginResponse {
if self.is_finalized {
return MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len());
if remaining_capacity > 0 {
let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity);
self.buffer.extend_from_slice(&data[..bytes_to_copy]);
if self.buffer.len() >= self.max_buffer_size {
let metadata = self.process_magic_types();
self.is_finalized = true;
return MetaPluginResponse {
metadata,
is_finalized: true,
};
}
}
MetaPluginResponse {
metadata: Vec::new(),
is_finalized: false,
}
}
fn finalize(&mut self) -> MetaPluginResponse {
if self.is_finalized {
return MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
let metadata = self.process_magic_types();
self.is_finalized = true;
MetaPluginResponse {
metadata,
is_finalized: true,
}
}
fn meta_type(&self) -> MetaPluginType {
MetaPluginType::MagicFile
}
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
self.base.outputs()
}
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
self.base.outputs_mut()
}
fn default_outputs(&self) -> Vec<String> {
vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()]
}
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
self.base.options()
}
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
self.base.options_mut()
}
}
#[cfg(not(feature = "magic"))]
#[derive(Debug)]
pub struct FallbackMagicFileMetaPlugin {
buffer: Vec<u8>,
max_buffer_size: usize,
is_finalized: bool,
base: BaseMetaPlugin,
}
#[cfg(not(feature = "magic"))]
impl FallbackMagicFileMetaPlugin {
pub fn new(
options: Option<std::collections::HashMap<String, serde_yaml::Value>>,
outputs: Option<std::collections::HashMap<String, serde_yaml::Value>>,
) -> FallbackMagicFileMetaPlugin {
let mut base = BaseMetaPlugin::new();
// Set default outputs
let default_outputs = &["mime_type", "mime_encoding", "file_type"];
base.initialize_plugin(default_outputs, &options, &outputs);
// Get max_buffer_size from options, default to PIPESIZE
let max_buffer_size = base.options
.get("max_buffer_size")
.and_then(|v| v.as_u64())
.unwrap_or(crate::common::PIPESIZE as u64) as usize;
FallbackMagicFileMetaPlugin {
buffer: Vec::new(),
max_buffer_size,
is_finalized: false,
base,
}
}
fn run_file_command(&self, buffer: &[u8]) -> io::Result<String> {
let mut temp_file = tempfile::NamedTempFile::new()?;
temp_file.as_ref().write_all(buffer)?;
let output = Command::new("file")
.arg("-b")
.arg("-m")
.arg("all")
.arg(temp_file.path())
.output()
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to run file command: {}", e)))?;
if !output.status.success() {
return Err(io::Error::new(io::ErrorKind::Other, "File command failed"));
}
let result = String::from_utf8_lossy(&output.stdout).trim().to_string();
Ok(result)
}
fn process_file_output(&self, result: &str) -> Vec<MetaData> {
let mut metadata = Vec::new();
// Parse the file command output
// file -m all output format is typically: type; charset=encoding
let parts: Vec<&str> = result.split(';').map(|s| s.trim()).collect();
let file_type = parts.first().cloned().unwrap_or(result);
let mime_encoding = parts.get(1)
.and_then(|s| s.strip_prefix("charset="))
.cloned()
.unwrap_or("");
// For mime_type, try to infer from file type or use a heuristic
let mime_type = if file_type.starts_with("text") {
"text/plain"
} else if file_type.contains("ASCII") || file_type.contains("UTF-8") {
"text/plain"
} else if file_type.contains("empty") {
"application/octet-stream"
} else {
"application/octet-stream" // default
};
let outputs_to_process = [
("mime_type", mime_type),
("mime_encoding", mime_encoding),
("file_type", file_type),
];
for (name, value) in outputs_to_process.iter() {
if let Some(meta_data) = process_metadata_outputs(
name,
serde_yaml::Value::String(value.to_string()),
self.base.outputs(),
) {
metadata.push(meta_data);
}
}
metadata
}
}
#[cfg(not(feature = "magic"))]
impl MetaPlugin for FallbackMagicFileMetaPlugin {
fn is_finalized(&self) -> bool {
self.is_finalized
}
fn set_finalized(&mut self, finalized: bool) {
self.is_finalized = finalized;
}
fn initialize(&mut self) -> MetaPluginResponse {
// No initialization needed for fallback
MetaPluginResponse {
metadata: Vec::new(),
is_finalized: false,
}
}
fn update(&mut self, data: &[u8]) -> MetaPluginResponse {
if self.is_finalized {
return MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len());
if remaining_capacity > 0 {
let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity);
self.buffer.extend_from_slice(&data[..bytes_to_copy]);
if self.buffer.len() >= self.max_buffer_size {
if let Ok(result) = self.run_file_command(&self.buffer) {
let metadata = self.process_file_output(&result);
self.is_finalized = true;
return MetaPluginResponse {
metadata,
is_finalized: true,
};
} else {
// On error, finalize with empty metadata
self.is_finalized = true;
return MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
}
}
MetaPluginResponse {
metadata: Vec::new(),
is_finalized: false,
}
}
fn finalize(&mut self) -> MetaPluginResponse {
if self.is_finalized {
return MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
let metadata = if !self.buffer.is_empty() {
if let Ok(result) = self.run_file_command(&self.buffer) {
self.process_file_output(&result)
} else {
Vec::new()
}
} else {
Vec::new()
};
self.is_finalized = true;
MetaPluginResponse {
metadata,
is_finalized: true,
}
}
fn meta_type(&self) -> MetaPluginType {
MetaPluginType::MagicFile
}
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
self.base.outputs()
}
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
self.base.outputs_mut()
}
fn default_outputs(&self) -> Vec<String> {
vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()]
}
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
self.base.options()
}
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
self.base.options_mut()
}
}
#[cfg(feature = "magic")]
pub use MagicFileMetaPluginImpl as MagicFileMetaPlugin;
#[cfg(not(feature = "magic"))]
pub use FallbackMagicFileMetaPlugin as MagicFileMetaPlugin;
use crate::meta_plugin::register_meta_plugin;
#[ctor::ctor]
fn register_magic_file_plugin() {
register_meta_plugin(MetaPluginType::MagicFile, |options, outputs| {
Box::new(MagicFileMetaPlugin::new(options, outputs))
});
}