Files
keep/src/meta_plugin/magic.rs
Andrew Phillips 1d463323ce refactor: update empty hashmap initialization with lazy initialization
Co-authored-by: aider (openai/andrew/openrouter/deepseek/deepseek-chat-v3.1) <aider@aider.chat>
2025-08-26 18:06:50 -03:00

221 lines
7.6 KiB
Rust

use magic::{Cookie, CookieFlags};
use std::io;
use crate::common::PIPESIZE;
use crate::meta_plugin::MetaPlugin;
#[derive(Debug)]
pub struct MagicFileMetaPlugin {
buffer: Vec<u8>,
max_buffer_size: usize,
is_saved: bool,
item_id: Option<i64>,
cookie: Option<Cookie>,
base: crate::meta_plugin::BaseMetaPlugin,
}
impl MagicFileMetaPlugin {
pub fn new(
options: Option<std::collections::HashMap<String, serde_yaml::Value>>,
outputs: Option<std::collections::HashMap<String, serde_yaml::Value>>,
) -> MagicFileMetaPlugin {
// Start with default options
let mut final_options = std::collections::HashMap::new();
final_options.insert("max_buffer_size".to_string(), serde_yaml::Value::Number(PIPESIZE.into()));
if let Some(opts) = options {
for (key, value) in opts {
final_options.insert(key, value);
}
}
// Start with default outputs
let mut final_outputs = std::collections::HashMap::new();
let default_outputs = vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()];
for output_name in default_outputs {
final_outputs.insert(output_name.clone(), serde_yaml::Value::String(output_name));
}
if let Some(outs) = outputs {
for (key, value) in outs {
final_outputs.insert(key, value);
}
}
let max_buffer_size = final_options.get("max_buffer_size")
.and_then(|v| v.as_u64())
.unwrap_or(PIPESIZE as u64) as usize;
let mut base = crate::meta_plugin::BaseMetaPlugin::new();
base.outputs = final_outputs;
base.options = final_options;
MagicFileMetaPlugin {
buffer: Vec::new(),
max_buffer_size,
is_saved: false,
item_id: None,
cookie: None,
base,
}
}
pub fn new_simple() -> MagicFileMetaPlugin {
Self::new(None, None)
}
fn get_magic_result(&self, flags: CookieFlags) -> io::Result<String> {
// Use the existing cookie and just change flags
if let Some(cookie) = &self.cookie {
cookie.set_flags(flags)
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to set magic flags: {}", e)))?;
let result = cookie.buffer(&self.buffer)
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?;
// Clean up the result - remove extra whitespace and take first part if needed
let trimmed = result.trim();
// For some magic results, we might want just the first part before semicolon or comma
let cleaned = if trimmed.contains(';') {
trimmed.split(';').next().unwrap_or(trimmed).trim()
} else if trimmed.contains(',') && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) {
trimmed.split(',').next().unwrap_or(trimmed).trim()
} else {
trimmed
};
Ok(cleaned.to_string())
} else {
Err(io::Error::new(io::ErrorKind::Other, "Magic cookie not initialized"))
}
}
/// Helper function to process all magic types and collect metadata
fn process_magic_types(&self) -> Vec<crate::meta_plugin::MetaData> {
let mut metadata = Vec::new();
// Define the types to process with their corresponding flags
let types_to_process = [
("mime_type", CookieFlags::MIME_TYPE),
("mime_encoding", CookieFlags::MIME_ENCODING),
("file_type", CookieFlags::default()),
];
for (name, flags) in types_to_process.iter() {
if let Ok(result) = self.get_magic_result(*flags) {
if !result.is_empty() {
// Use process_metadata_outputs to handle output mapping
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
name,
result,
self.base.outputs()
) {
metadata.push(meta_data);
}
}
}
}
metadata
}
}
impl MetaPlugin for MagicFileMetaPlugin {
fn initialize(&mut self) -> crate::meta_plugin::MetaPluginResponse {
// Initialize the magic cookie once
let cookie = match Cookie::open(Default::default()) {
Ok(cookie) => cookie,
Err(_e) => {
return crate::meta_plugin::MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
};
if let Err(_e) = cookie.load(&[] as &[&str]) {
return crate::meta_plugin::MetaPluginResponse {
metadata: Vec::new(),
is_finalized: true,
};
}
self.cookie = Some(cookie);
crate::meta_plugin::MetaPluginResponse {
metadata: Vec::new(),
is_finalized: false,
}
}
fn finalize(&mut self) -> crate::meta_plugin::MetaPluginResponse {
let metadata = self.process_magic_types();
crate::meta_plugin::MetaPluginResponse {
metadata,
is_finalized: true,
}
}
fn update(&mut self, data: &[u8]) -> crate::meta_plugin::MetaPluginResponse {
let mut metadata = Vec::new();
// Only collect up to max_buffer_size
let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len());
if remaining_capacity > 0 {
let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity);
self.buffer.extend_from_slice(&data[..bytes_to_copy]);
// Check if we've reached our buffer limit and return metadata
if self.buffer.len() >= self.max_buffer_size {
metadata = self.process_magic_types();
}
}
let is_finalized = !metadata.is_empty();
crate::meta_plugin::MetaPluginResponse {
metadata,
is_finalized,
}
}
fn meta_name(&self) -> String {
"magic_file".to_string()
}
fn configure_options(&mut self, options: &std::collections::HashMap<String, serde_yaml::Value>) -> anyhow::Result<()> {
if let Some(max_buffer_size) = options.get("max_buffer_size") {
if let Some(size) = max_buffer_size.as_u64() {
self.max_buffer_size = size as usize;
}
}
Ok(())
}
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
self.base.outputs()
}
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
self.base.outputs_mut()
}
fn default_outputs(&self) -> Vec<String> {
vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()]
}
fn default_options(&self) -> std::collections::HashMap<String, serde_yaml::Value> {
let mut options = std::collections::HashMap::new();
options.insert("max_buffer_size".to_string(), serde_yaml::Value::Number(serde_yaml::Number::from(PIPESIZE as i64)));
options
}
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
self.base.options()
}
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
self.base.options_mut()
}
}