fix: complete magic file plugin implementation and error handling
Co-authored-by: aider (openai/andrew/openrouter/sonoma-sky-alpha) <aider@aider.chat>
This commit is contained in:
@@ -1,15 +1,16 @@
|
||||
#[cfg(feature = "magic")]
|
||||
use magic::{Cookie, CookieFlags};
|
||||
#[cfg(not(feature = "magic"))]
|
||||
use std::process::{Command, Stdio};
|
||||
use std::io::{self, Write};
|
||||
use std::process::{Command, Stdio, Output};
|
||||
#[cfg(not(feature = "magic"))]
|
||||
use which::which;
|
||||
use std::io::{self, Write};
|
||||
use log::debug;
|
||||
use serde_yaml;
|
||||
|
||||
use crate::common::PIPESIZE;
|
||||
|
||||
use crate::meta_plugin::{MetaPlugin, MetaPluginType, BaseMetaPlugin, MetaPluginResponse, MetaData};
|
||||
use crate::meta_plugin::{MetaPlugin, MetaPluginType, BaseMetaPlugin, MetaPluginResponse, MetaData, process_metadata_outputs};
|
||||
|
||||
#[cfg(feature = "magic")]
|
||||
#[derive(Debug)]
|
||||
@@ -70,28 +71,416 @@ impl MagicFileMetaPlugin {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn get_magic_result(&self, flags: CookieFlags) -> io::Result<String> {
|
||||
// Use the existing cookie and just change flags
|
||||
if let Some(cookie) = &self.cookie {
|
||||
cookie.set_flags(flags)
|
||||
.map_err(|e| io::Error::other(format!("Failed to set magic flags: {}", e)))?;
|
||||
if self.buffer.is_empty() {
|
||||
return Ok("empty".to_string());
|
||||
}
|
||||
|
||||
let cookie = if let Some(c) = &self.cookie {
|
||||
c.set_flags(flags)
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to set magic flags: {}", e)))?;
|
||||
c
|
||||
} else {
|
||||
return Err(io::Error::new(io::ErrorKind::Other, "Magic cookie not initialized"));
|
||||
};
|
||||
|
||||
let result = cookie.buffer(&self.buffer)
|
||||
.map_err(|e| io::Error::other(format!("Failed to analyze buffer: {}", e)))?;
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?;
|
||||
|
||||
// Clean up the result - remove extra whitespace and take first part if needed
|
||||
let trimmed = result.trim();
|
||||
|
||||
// For some magic results, we might want just the first part before semicolon or comma
|
||||
let cleaned = if trimmed.contains(';') {
|
||||
trimmed.split(';').next().unwrap_or(trimmed).trim()
|
||||
trimmed.split(';').next().unwrap_or(trimmed).trim().to_string()
|
||||
} else if trimmed.contains(',') && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) {
|
||||
trimmed.split(',').next().unwrap_or(trimmed).trim()
|
||||
trimmed.split(',').next().unwrap_or(trimmed).trim().to_string()
|
||||
} else {
|
||||
trimmed
|
||||
trimmed.to_string()
|
||||
};
|
||||
|
||||
Ok(cleaned.to_string())
|
||||
Ok(cleaned)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "magic")]
|
||||
impl MetaPlugin for MagicFileMetaPlugin {
|
||||
fn meta_type(&self) -> MetaPluginType {
|
||||
MetaPluginType::MagicFile
|
||||
}
|
||||
|
||||
fn is_supported(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn is_internal(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn is_finalized(&self) -> bool {
|
||||
self.is_finalized
|
||||
}
|
||||
|
||||
fn initialize(&mut self) -> MetaPluginResponse {
|
||||
self.is_finalized = false;
|
||||
MetaPluginResponse::default()
|
||||
}
|
||||
|
||||
fn update(&mut self, data: &[u8]) -> MetaPluginResponse {
|
||||
if self.buffer.len() + data.len() > self.max_buffer_size {
|
||||
// Truncate to max size, keeping the beginning
|
||||
let additional_space = self.max_buffer_size.saturating_sub(self.buffer.len());
|
||||
if additional_space > 0 {
|
||||
self.buffer.extend_from_slice(&data[..additional_space.min(data.len())]);
|
||||
}
|
||||
} else {
|
||||
Err(io::Error::other("Magic cookie not
|
||||
self.buffer.extend_from_slice(data);
|
||||
}
|
||||
MetaPluginResponse::default()
|
||||
}
|
||||
|
||||
fn finalize(&mut self) -> MetaPluginResponse {
|
||||
let mut metadata = Vec::new();
|
||||
let mut response = MetaPluginResponse {
|
||||
metadata,
|
||||
is_finalized: true,
|
||||
};
|
||||
|
||||
if self.buffer.is_empty() {
|
||||
self.is_finalized = true;
|
||||
return response;
|
||||
}
|
||||
|
||||
// Initialize cookie if not already done
|
||||
if self.cookie.is_none() {
|
||||
match Cookie::open(CookieFlags::default()) {
|
||||
Ok(cookie) => {
|
||||
self.cookie = Some(cookie);
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("META: Failed to initialize magic cookie: {}", e);
|
||||
self.is_finalized = true;
|
||||
return response;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process mime_type
|
||||
if let Some(_) = self.base.outputs.get("mime_type") {
|
||||
match self.get_magic_result(CookieFlags::MIME_TYPE) {
|
||||
Ok(mime_type) => {
|
||||
if let Some(meta_data) = process_metadata_outputs(
|
||||
"mime_type",
|
||||
serde_yaml::Value::String(mime_type),
|
||||
&self.base.outputs,
|
||||
) {
|
||||
response.metadata.push(meta_data);
|
||||
}
|
||||
}
|
||||
Err(e) => debug!("META: Failed to get MIME type: {}", e),
|
||||
}
|
||||
}
|
||||
|
||||
// Process mime_encoding
|
||||
if let Some(_) = self.base.outputs.get("mime_encoding") {
|
||||
match self.get_magic_result(CookieFlags::MIME_ENCODING) {
|
||||
Ok(mime_encoding) => {
|
||||
if let Some(meta_data) = process_metadata_outputs(
|
||||
"mime_encoding",
|
||||
serde_yaml::Value::String(mime_encoding),
|
||||
&self.base.outputs,
|
||||
) {
|
||||
response.metadata.push(meta_data);
|
||||
}
|
||||
}
|
||||
Err(e) => debug!("META: Failed to get MIME encoding: {}", e),
|
||||
}
|
||||
}
|
||||
|
||||
// Process file_type (description)
|
||||
if let Some(_) = self.base.outputs.get("file_type") {
|
||||
match self.get_magic_result(CookieFlags::empty()) {
|
||||
Ok(file_type) => {
|
||||
if let Some(meta_data) = process_metadata_outputs(
|
||||
"file_type",
|
||||
serde_yaml::Value::String(file_type),
|
||||
&self.base.outputs,
|
||||
) {
|
||||
response.metadata.push(meta_data);
|
||||
}
|
||||
}
|
||||
Err(e) => debug!("META: Failed to get file type: {}", e),
|
||||
}
|
||||
}
|
||||
|
||||
self.is_finalized = true;
|
||||
response
|
||||
}
|
||||
|
||||
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
||||
&self.base.outputs
|
||||
}
|
||||
|
||||
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
|
||||
&mut self.base.outputs
|
||||
}
|
||||
|
||||
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
||||
&self.base.options
|
||||
}
|
||||
|
||||
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
|
||||
&mut self.base.options
|
||||
}
|
||||
|
||||
fn default_outputs(&self) -> Vec<String> {
|
||||
vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()]
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "magic"))]
|
||||
#[derive(Debug)]
|
||||
pub struct FallbackMagicFileMetaPlugin {
|
||||
buffer: Vec<u8>,
|
||||
max_buffer_size: usize,
|
||||
supported: bool,
|
||||
is_finalized: bool,
|
||||
base: BaseMetaPlugin,
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "magic"))]
|
||||
impl FallbackMagicFileMetaPlugin {
|
||||
pub fn new(
|
||||
options: Option<std::collections::HashMap<String, serde_yaml::Value>>,
|
||||
outputs: Option<std::collections::HashMap<String, serde_yaml::Value>>,
|
||||
) -> FallbackMagicFileMetaPlugin {
|
||||
let supported = which("file").is_ok();
|
||||
|
||||
// Start with default options
|
||||
let mut final_options = std::collections::HashMap::new();
|
||||
final_options.insert("max_buffer_size".to_string(), serde_yaml::Value::Number(PIPESIZE.into()));
|
||||
if let Some(opts) = options {
|
||||
for (key, value) in opts {
|
||||
final_options.insert(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
// Start with default outputs
|
||||
let mut final_outputs = std::collections::HashMap::new();
|
||||
let default_outputs = vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()];
|
||||
for output_name in default_outputs {
|
||||
final_outputs.insert(output_name.clone(), serde_yaml::Value::String(output_name));
|
||||
}
|
||||
if let Some(outs) = outputs {
|
||||
for (key, value) in outs {
|
||||
final_outputs.insert(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
let max_buffer_size = final_options.get("max_buffer_size")
|
||||
.and_then(|v| v.as_u64())
|
||||
.unwrap_or(PIPESIZE as u64) as usize;
|
||||
|
||||
let mut base = BaseMetaPlugin::new();
|
||||
base.outputs = final_outputs;
|
||||
base.options = final_options;
|
||||
|
||||
FallbackMagicFileMetaPlugin {
|
||||
buffer: Vec::new(),
|
||||
max_buffer_size,
|
||||
supported,
|
||||
is_finalized: false,
|
||||
base,
|
||||
}
|
||||
}
|
||||
|
||||
fn run_file_command(&self, args: &[&str]) -> io::Result<String> {
|
||||
if self.buffer.is_empty() {
|
||||
return Ok("empty".to_string());
|
||||
}
|
||||
|
||||
let mut cmd = Command::new("file");
|
||||
for arg in args {
|
||||
cmd.arg(arg);
|
||||
}
|
||||
cmd.arg("-").stdin(Stdio::piped()).stdout(Stdio::piped()).stderr(Stdio::piped());
|
||||
|
||||
let mut child = cmd.spawn()
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to spawn file command: {}", e)))?;
|
||||
|
||||
{
|
||||
let stdin = child.stdin.as_mut().unwrap();
|
||||
stdin.write_all(&self.buffer)
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to write to file stdin: {}", e)))?;
|
||||
}
|
||||
|
||||
let output = child.wait_with_output()
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to wait on file command: {}", e)))?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
return Err(io::Error::new(io::ErrorKind::Other, format!("File command failed: {}", stderr)));
|
||||
}
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
||||
Ok(stdout)
|
||||
}
|
||||
|
||||
fn get_mime_info(&self) -> io::Result<(String, String)> {
|
||||
let mime_output = self.run_file_command(&["-b", "--mime"])?;
|
||||
if mime_output == "empty" {
|
||||
return Ok(("application/octet-stream".to_string(), "binary".to_string()));
|
||||
}
|
||||
|
||||
let parts: Vec<&str> = mime_output.split(';').collect();
|
||||
let mime_type = if parts.is_empty() {
|
||||
mime_output
|
||||
} else {
|
||||
parts[0].trim()
|
||||
}.to_string();
|
||||
|
||||
let mime_encoding = if parts.len() > 1 {
|
||||
parts[1].replace(" charset=", "").trim().to_string()
|
||||
} else {
|
||||
"binary".to_string()
|
||||
};
|
||||
|
||||
Ok((mime_type, mime_encoding))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "magic"))]
|
||||
impl MetaPlugin for FallbackMagicFileMetaPlugin {
|
||||
fn meta_type(&self) -> MetaPluginType {
|
||||
MetaPluginType::MagicFile
|
||||
}
|
||||
|
||||
fn is_supported(&self) -> bool {
|
||||
self.supported
|
||||
}
|
||||
|
||||
fn is_internal(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn is_finalized(&self) -> bool {
|
||||
self.is_finalized
|
||||
}
|
||||
|
||||
fn initialize(&mut self) -> MetaPluginResponse {
|
||||
self.is_finalized = false;
|
||||
MetaPluginResponse::default()
|
||||
}
|
||||
|
||||
fn update(&mut self, data: &[u8]) -> MetaPluginResponse {
|
||||
if self.buffer.len() + data.len() > self.max_buffer_size {
|
||||
// Truncate to max size, keeping the beginning
|
||||
let additional_space = self.max_buffer_size.saturating_sub(self.buffer.len());
|
||||
if additional_space > 0 {
|
||||
self.buffer.extend_from_slice(&data[..additional_space.min(data.len())]);
|
||||
}
|
||||
} else {
|
||||
self.buffer.extend_from_slice(data);
|
||||
}
|
||||
MetaPluginResponse::default()
|
||||
}
|
||||
|
||||
fn finalize(&mut self) -> MetaPluginResponse {
|
||||
let mut metadata = Vec::new();
|
||||
let mut response = MetaPluginResponse {
|
||||
metadata,
|
||||
is_finalized: true,
|
||||
};
|
||||
|
||||
if !self.supported || self.buffer.is_empty() {
|
||||
self.is_finalized = true;
|
||||
return response;
|
||||
}
|
||||
|
||||
// Process mime_type and mime_encoding from single mime command
|
||||
match self.get_mime_info() {
|
||||
Ok((mime_type, mime_encoding)) => {
|
||||
if let Some(_) = self.base.outputs.get("mime_type") {
|
||||
if let Some(meta_data) = process_metadata_outputs(
|
||||
"mime_type",
|
||||
serde_yaml::Value::String(mime_type.clone()),
|
||||
&self.base.outputs,
|
||||
) {
|
||||
response.metadata.push(meta_data);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(_) = self.base.outputs.get("mime_encoding") {
|
||||
if let Some(meta_data) = process_metadata_outputs(
|
||||
"mime_encoding",
|
||||
serde_yaml::Value::String(mime_encoding),
|
||||
&self.base.outputs,
|
||||
) {
|
||||
response.metadata.push(meta_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => debug!("META: Failed to get MIME info with file command: {}", e),
|
||||
}
|
||||
|
||||
// Process file_type (description)
|
||||
if let Some(_) = self.base.outputs.get("file_type") {
|
||||
match self.run_file_command(&["-b"]) {
|
||||
Ok(file_type) => {
|
||||
if let Some(meta_data) = process_metadata_outputs(
|
||||
"file_type",
|
||||
serde_yaml::Value::String(file_type),
|
||||
&self.base.outputs,
|
||||
) {
|
||||
response.metadata.push(meta_data);
|
||||
}
|
||||
}
|
||||
Err(e) => debug!("META: Failed to get file type with file command: {}", e),
|
||||
}
|
||||
}
|
||||
|
||||
self.is_finalized = true;
|
||||
response
|
||||
}
|
||||
|
||||
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
||||
&self.base.outputs
|
||||
}
|
||||
|
||||
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
|
||||
&mut self.base.outputs
|
||||
}
|
||||
|
||||
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
||||
&self.base.options
|
||||
}
|
||||
|
||||
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
|
||||
&mut self.base.options
|
||||
}
|
||||
|
||||
fn default_outputs(&self) -> Vec<String> {
|
||||
vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()]
|
||||
}
|
||||
}
|
||||
|
||||
// Registration
|
||||
#[cfg(feature = "magic")]
|
||||
use crate::meta_plugin::{register_meta_plugin, MetaPluginType};
|
||||
#[cfg(feature = "magic")]
|
||||
#[ctor::ctor]
|
||||
fn register_magic_plugin() {
|
||||
register_meta_plugin(MetaPluginType::MagicFile, |options, outputs| {
|
||||
Box::new(MagicFileMetaPlugin::new(options, outputs))
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "magic"))]
|
||||
use crate::meta_plugin::{register_meta_plugin, MetaPluginType};
|
||||
#[cfg(not(feature = "magic"))]
|
||||
#[ctor::ctor]
|
||||
fn register_fallback_magic_plugin() {
|
||||
register_meta_plugin(MetaPluginType::MagicFile, |options, outputs| {
|
||||
Box::new(FallbackMagicFileMetaPlugin::new(options, outputs))
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user