fix: complete magic file plugin implementation and error handling

Co-authored-by: aider (openai/andrew/openrouter/sonoma-sky-alpha) <aider@aider.chat>
This commit is contained in:
Andrew Phillips
2025-09-12 10:27:16 -03:00
parent 84666155c4
commit 9c354d5ef4

View File

@@ -1,15 +1,16 @@
#[cfg(feature = "magic")]
use magic::{Cookie, CookieFlags};
#[cfg(not(feature = "magic"))]
use std::process::{Command, Stdio};
use std::io::{self, Write};
use std::process::{Command, Stdio, Output};
#[cfg(not(feature = "magic"))]
use which::which;
use std::io::{self, Write};
use log::debug;
use serde_yaml;
use crate::common::PIPESIZE;
use crate::meta_plugin::{MetaPlugin, MetaPluginType, BaseMetaPlugin, MetaPluginResponse, MetaData};
use crate::meta_plugin::{MetaPlugin, MetaPluginType, BaseMetaPlugin, MetaPluginResponse, MetaData, process_metadata_outputs};
#[cfg(feature = "magic")]
#[derive(Debug)]
@@ -70,28 +71,416 @@ impl MagicFileMetaPlugin {
}
}
fn get_magic_result(&self, flags: CookieFlags) -> io::Result<String> {
// Use the existing cookie and just change flags
if let Some(cookie) = &self.cookie {
cookie.set_flags(flags)
.map_err(|e| io::Error::other(format!("Failed to set magic flags: {}", e)))?;
if self.buffer.is_empty() {
return Ok("empty".to_string());
}
let cookie = if let Some(c) = &self.cookie {
c.set_flags(flags)
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to set magic flags: {}", e)))?;
c
} else {
return Err(io::Error::new(io::ErrorKind::Other, "Magic cookie not initialized"));
};
let result = cookie.buffer(&self.buffer)
.map_err(|e| io::Error::other(format!("Failed to analyze buffer: {}", e)))?;
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?;
// Clean up the result - remove extra whitespace and take first part if needed
let trimmed = result.trim();
// For some magic results, we might want just the first part before semicolon or comma
let cleaned = if trimmed.contains(';') {
trimmed.split(';').next().unwrap_or(trimmed).trim()
trimmed.split(';').next().unwrap_or(trimmed).trim().to_string()
} else if trimmed.contains(',') && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) {
trimmed.split(',').next().unwrap_or(trimmed).trim()
trimmed.split(',').next().unwrap_or(trimmed).trim().to_string()
} else {
trimmed
trimmed.to_string()
};
Ok(cleaned.to_string())
Ok(cleaned)
}
}
#[cfg(feature = "magic")]
impl MetaPlugin for MagicFileMetaPlugin {
fn meta_type(&self) -> MetaPluginType {
MetaPluginType::MagicFile
}
fn is_supported(&self) -> bool {
true
}
fn is_internal(&self) -> bool {
true
}
fn is_finalized(&self) -> bool {
self.is_finalized
}
fn initialize(&mut self) -> MetaPluginResponse {
self.is_finalized = false;
MetaPluginResponse::default()
}
fn update(&mut self, data: &[u8]) -> MetaPluginResponse {
if self.buffer.len() + data.len() > self.max_buffer_size {
// Truncate to max size, keeping the beginning
let additional_space = self.max_buffer_size.saturating_sub(self.buffer.len());
if additional_space > 0 {
self.buffer.extend_from_slice(&data[..additional_space.min(data.len())]);
}
} else {
Err(io::Error::other("Magic cookie not
self.buffer.extend_from_slice(data);
}
MetaPluginResponse::default()
}
fn finalize(&mut self) -> MetaPluginResponse {
let mut metadata = Vec::new();
let mut response = MetaPluginResponse {
metadata,
is_finalized: true,
};
if self.buffer.is_empty() {
self.is_finalized = true;
return response;
}
// Initialize cookie if not already done
if self.cookie.is_none() {
match Cookie::open(CookieFlags::default()) {
Ok(cookie) => {
self.cookie = Some(cookie);
}
Err(e) => {
debug!("META: Failed to initialize magic cookie: {}", e);
self.is_finalized = true;
return response;
}
}
}
// Process mime_type
if let Some(_) = self.base.outputs.get("mime_type") {
match self.get_magic_result(CookieFlags::MIME_TYPE) {
Ok(mime_type) => {
if let Some(meta_data) = process_metadata_outputs(
"mime_type",
serde_yaml::Value::String(mime_type),
&self.base.outputs,
) {
response.metadata.push(meta_data);
}
}
Err(e) => debug!("META: Failed to get MIME type: {}", e),
}
}
// Process mime_encoding
if let Some(_) = self.base.outputs.get("mime_encoding") {
match self.get_magic_result(CookieFlags::MIME_ENCODING) {
Ok(mime_encoding) => {
if let Some(meta_data) = process_metadata_outputs(
"mime_encoding",
serde_yaml::Value::String(mime_encoding),
&self.base.outputs,
) {
response.metadata.push(meta_data);
}
}
Err(e) => debug!("META: Failed to get MIME encoding: {}", e),
}
}
// Process file_type (description)
if let Some(_) = self.base.outputs.get("file_type") {
match self.get_magic_result(CookieFlags::empty()) {
Ok(file_type) => {
if let Some(meta_data) = process_metadata_outputs(
"file_type",
serde_yaml::Value::String(file_type),
&self.base.outputs,
) {
response.metadata.push(meta_data);
}
}
Err(e) => debug!("META: Failed to get file type: {}", e),
}
}
self.is_finalized = true;
response
}
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
&self.base.outputs
}
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
&mut self.base.outputs
}
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
&self.base.options
}
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
&mut self.base.options
}
fn default_outputs(&self) -> Vec<String> {
vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()]
}
}
#[cfg(not(feature = "magic"))]
#[derive(Debug)]
pub struct FallbackMagicFileMetaPlugin {
buffer: Vec<u8>,
max_buffer_size: usize,
supported: bool,
is_finalized: bool,
base: BaseMetaPlugin,
}
#[cfg(not(feature = "magic"))]
impl FallbackMagicFileMetaPlugin {
pub fn new(
options: Option<std::collections::HashMap<String, serde_yaml::Value>>,
outputs: Option<std::collections::HashMap<String, serde_yaml::Value>>,
) -> FallbackMagicFileMetaPlugin {
let supported = which("file").is_ok();
// Start with default options
let mut final_options = std::collections::HashMap::new();
final_options.insert("max_buffer_size".to_string(), serde_yaml::Value::Number(PIPESIZE.into()));
if let Some(opts) = options {
for (key, value) in opts {
final_options.insert(key, value);
}
}
// Start with default outputs
let mut final_outputs = std::collections::HashMap::new();
let default_outputs = vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()];
for output_name in default_outputs {
final_outputs.insert(output_name.clone(), serde_yaml::Value::String(output_name));
}
if let Some(outs) = outputs {
for (key, value) in outs {
final_outputs.insert(key, value);
}
}
let max_buffer_size = final_options.get("max_buffer_size")
.and_then(|v| v.as_u64())
.unwrap_or(PIPESIZE as u64) as usize;
let mut base = BaseMetaPlugin::new();
base.outputs = final_outputs;
base.options = final_options;
FallbackMagicFileMetaPlugin {
buffer: Vec::new(),
max_buffer_size,
supported,
is_finalized: false,
base,
}
}
fn run_file_command(&self, args: &[&str]) -> io::Result<String> {
if self.buffer.is_empty() {
return Ok("empty".to_string());
}
let mut cmd = Command::new("file");
for arg in args {
cmd.arg(arg);
}
cmd.arg("-").stdin(Stdio::piped()).stdout(Stdio::piped()).stderr(Stdio::piped());
let mut child = cmd.spawn()
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to spawn file command: {}", e)))?;
{
let stdin = child.stdin.as_mut().unwrap();
stdin.write_all(&self.buffer)
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to write to file stdin: {}", e)))?;
}
let output = child.wait_with_output()
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to wait on file command: {}", e)))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(io::Error::new(io::ErrorKind::Other, format!("File command failed: {}", stderr)));
}
let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string();
Ok(stdout)
}
fn get_mime_info(&self) -> io::Result<(String, String)> {
let mime_output = self.run_file_command(&["-b", "--mime"])?;
if mime_output == "empty" {
return Ok(("application/octet-stream".to_string(), "binary".to_string()));
}
let parts: Vec<&str> = mime_output.split(';').collect();
let mime_type = if parts.is_empty() {
mime_output
} else {
parts[0].trim()
}.to_string();
let mime_encoding = if parts.len() > 1 {
parts[1].replace(" charset=", "").trim().to_string()
} else {
"binary".to_string()
};
Ok((mime_type, mime_encoding))
}
}
#[cfg(not(feature = "magic"))]
impl MetaPlugin for FallbackMagicFileMetaPlugin {
fn meta_type(&self) -> MetaPluginType {
MetaPluginType::MagicFile
}
fn is_supported(&self) -> bool {
self.supported
}
fn is_internal(&self) -> bool {
true
}
fn is_finalized(&self) -> bool {
self.is_finalized
}
fn initialize(&mut self) -> MetaPluginResponse {
self.is_finalized = false;
MetaPluginResponse::default()
}
fn update(&mut self, data: &[u8]) -> MetaPluginResponse {
if self.buffer.len() + data.len() > self.max_buffer_size {
// Truncate to max size, keeping the beginning
let additional_space = self.max_buffer_size.saturating_sub(self.buffer.len());
if additional_space > 0 {
self.buffer.extend_from_slice(&data[..additional_space.min(data.len())]);
}
} else {
self.buffer.extend_from_slice(data);
}
MetaPluginResponse::default()
}
fn finalize(&mut self) -> MetaPluginResponse {
let mut metadata = Vec::new();
let mut response = MetaPluginResponse {
metadata,
is_finalized: true,
};
if !self.supported || self.buffer.is_empty() {
self.is_finalized = true;
return response;
}
// Process mime_type and mime_encoding from single mime command
match self.get_mime_info() {
Ok((mime_type, mime_encoding)) => {
if let Some(_) = self.base.outputs.get("mime_type") {
if let Some(meta_data) = process_metadata_outputs(
"mime_type",
serde_yaml::Value::String(mime_type.clone()),
&self.base.outputs,
) {
response.metadata.push(meta_data);
}
}
if let Some(_) = self.base.outputs.get("mime_encoding") {
if let Some(meta_data) = process_metadata_outputs(
"mime_encoding",
serde_yaml::Value::String(mime_encoding),
&self.base.outputs,
) {
response.metadata.push(meta_data);
}
}
}
Err(e) => debug!("META: Failed to get MIME info with file command: {}", e),
}
// Process file_type (description)
if let Some(_) = self.base.outputs.get("file_type") {
match self.run_file_command(&["-b"]) {
Ok(file_type) => {
if let Some(meta_data) = process_metadata_outputs(
"file_type",
serde_yaml::Value::String(file_type),
&self.base.outputs,
) {
response.metadata.push(meta_data);
}
}
Err(e) => debug!("META: Failed to get file type with file command: {}", e),
}
}
self.is_finalized = true;
response
}
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
&self.base.outputs
}
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
&mut self.base.outputs
}
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
&self.base.options
}
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
&mut self.base.options
}
fn default_outputs(&self) -> Vec<String> {
vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()]
}
}
// Registration
#[cfg(feature = "magic")]
use crate::meta_plugin::{register_meta_plugin, MetaPluginType};
#[cfg(feature = "magic")]
#[ctor::ctor]
fn register_magic_plugin() {
register_meta_plugin(MetaPluginType::MagicFile, |options, outputs| {
Box::new(MagicFileMetaPlugin::new(options, outputs))
});
}
#[cfg(not(feature = "magic"))]
use crate::meta_plugin::{register_meta_plugin, MetaPluginType};
#[cfg(not(feature = "magic"))]
#[ctor::ctor]
fn register_fallback_magic_plugin() {
register_meta_plugin(MetaPluginType::MagicFile, |options, outputs| {
Box::new(FallbackMagicFileMetaPlugin::new(options, outputs))
});
}