Co-authored-by: aider (openai/andrew/openrouter/anthropic/claude-sonnet-4) <aider@aider.chat>
191 lines
6.7 KiB
Rust
191 lines
6.7 KiB
Rust
use anyhow::Result;
|
|
use magic::{Cookie, CookieFlags};
|
|
use rusqlite::Connection;
|
|
use std::io;
|
|
|
|
use crate::meta_plugin::MetaPlugin;
|
|
|
|
#[derive(Debug)]
|
|
pub struct MagicFileMetaPlugin {
|
|
buffer: Vec<u8>,
|
|
max_buffer_size: usize,
|
|
is_saved: bool,
|
|
item_id: Option<i64>,
|
|
cookie: Option<Cookie>,
|
|
outputs: std::collections::HashMap<String, serde_yaml::Value>,
|
|
}
|
|
|
|
impl MagicFileMetaPlugin {
|
|
pub fn new(
|
|
options: Option<std::collections::HashMap<String, serde_yaml::Value>>,
|
|
outputs: Option<std::collections::HashMap<String, serde_yaml::Value>>,
|
|
) -> MagicFileMetaPlugin {
|
|
// Start with default options
|
|
let mut final_options = Self::new_simple().default_options();
|
|
if let Some(opts) = options {
|
|
for (key, value) in opts {
|
|
final_options.insert(key, value);
|
|
}
|
|
}
|
|
|
|
// Start with default outputs
|
|
let mut final_outputs = std::collections::HashMap::new();
|
|
let default_outputs = Self::new_simple().default_outputs();
|
|
for output_name in default_outputs {
|
|
final_outputs.insert(output_name.clone(), serde_yaml::Value::String(output_name));
|
|
}
|
|
if let Some(outs) = outputs {
|
|
for (key, value) in outs {
|
|
final_outputs.insert(key, value);
|
|
}
|
|
}
|
|
|
|
let max_buffer_size = final_options.get("max_buffer_size")
|
|
.and_then(|v| v.as_u64())
|
|
.unwrap_or(4096) as usize;
|
|
|
|
MagicFileMetaPlugin {
|
|
buffer: Vec::new(),
|
|
max_buffer_size,
|
|
is_saved: false,
|
|
item_id: None,
|
|
cookie: None,
|
|
outputs: final_outputs,
|
|
}
|
|
}
|
|
|
|
pub fn new_simple() -> MagicFileMetaPlugin {
|
|
Self::new(None, None)
|
|
}
|
|
|
|
fn get_magic_result(&self, flags: CookieFlags) -> io::Result<String> {
|
|
// Use the existing cookie and just change flags
|
|
if let Some(cookie) = &self.cookie {
|
|
cookie.set_flags(flags)
|
|
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to set magic flags: {}", e)))?;
|
|
|
|
let result = cookie.buffer(&self.buffer)
|
|
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?;
|
|
|
|
// Clean up the result - remove extra whitespace and take first part if needed
|
|
let trimmed = result.trim();
|
|
|
|
// For some magic results, we might want just the first part before semicolon or comma
|
|
let cleaned = if trimmed.contains(';') {
|
|
trimmed.split(';').next().unwrap_or(trimmed).trim()
|
|
} else if trimmed.contains(',') && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) {
|
|
trimmed.split(',').next().unwrap_or(trimmed).trim()
|
|
} else {
|
|
trimmed
|
|
};
|
|
|
|
Ok(cleaned.to_string())
|
|
} else {
|
|
Err(io::Error::new(io::ErrorKind::Other, "Magic cookie not initialized"))
|
|
}
|
|
}
|
|
|
|
fn save_all_magic_metadata(&mut self, conn: &Connection) -> Result<()> {
|
|
if let Some(item_id) = self.item_id {
|
|
// Save all three magic outputs: mime_type, mime_encoding, and file_type
|
|
if let Ok(mime_type) = self.get_magic_result(CookieFlags::MIME_TYPE) {
|
|
if !mime_type.is_empty() {
|
|
let _ = self.save_meta(conn, item_id, "mime_type", mime_type);
|
|
}
|
|
}
|
|
|
|
if let Ok(mime_encoding) = self.get_magic_result(CookieFlags::MIME_ENCODING) {
|
|
if !mime_encoding.is_empty() {
|
|
let _ = self.save_meta(conn, item_id, "mime_encoding", mime_encoding);
|
|
}
|
|
}
|
|
|
|
if let Ok(file_type) = self.get_magic_result(CookieFlags::default()) {
|
|
if !file_type.is_empty() {
|
|
let _ = self.save_meta(conn, item_id, "file_type", file_type);
|
|
}
|
|
}
|
|
|
|
self.is_saved = true;
|
|
}
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
impl MetaPlugin for MagicFileMetaPlugin {
|
|
fn is_internal(&self) -> bool {
|
|
true
|
|
}
|
|
|
|
fn initialize(&mut self, _conn: &Connection, item_id: i64) -> Result<()> {
|
|
self.item_id = Some(item_id);
|
|
|
|
// Initialize the magic cookie once
|
|
let cookie = Cookie::open(Default::default())
|
|
.map_err(|e| anyhow::anyhow!("Failed to open magic cookie: {}", e))?;
|
|
cookie.load(&[] as &[&str])
|
|
.map_err(|e| anyhow::anyhow!("Failed to load magic database: {}", e))?;
|
|
self.cookie = Some(cookie);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn finalize(&mut self, conn: &Connection) -> Result<()> {
|
|
// Save all magic metadata if not already saved
|
|
if !self.is_saved {
|
|
if let Err(e) = self.save_all_magic_metadata(conn) {
|
|
eprintln!("Warning: Failed to save magic metadata: {}", e);
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn update(&mut self, data: &[u8], conn: &Connection) {
|
|
// Only collect up to max_buffer_size
|
|
let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len());
|
|
if remaining_capacity > 0 {
|
|
let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity);
|
|
self.buffer.extend_from_slice(&data[..bytes_to_copy]);
|
|
|
|
// Check if we've reached our buffer limit and save if so
|
|
if self.buffer.len() >= self.max_buffer_size && !self.is_saved {
|
|
if let Err(e) = self.save_all_magic_metadata(conn) {
|
|
eprintln!("Warning: Failed to save magic metadata early: {}", e);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn meta_name(&self) -> String {
|
|
"magic_file".to_string()
|
|
}
|
|
|
|
fn configure_options(&mut self, options: &std::collections::HashMap<String, serde_yaml::Value>) -> Result<()> {
|
|
if let Some(max_buffer_size) = options.get("max_buffer_size") {
|
|
if let Some(size) = max_buffer_size.as_u64() {
|
|
self.max_buffer_size = size as usize;
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
|
&self.outputs
|
|
}
|
|
|
|
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
|
|
&mut self.outputs
|
|
}
|
|
|
|
fn default_outputs(&self) -> Vec<String> {
|
|
vec!["mime_type".to_string(), "mime_encoding".to_string(), "file_type".to_string()]
|
|
}
|
|
|
|
fn default_options(&self) -> std::collections::HashMap<String, serde_yaml::Value> {
|
|
let mut options = std::collections::HashMap::new();
|
|
options.insert("max_buffer_size".to_string(), serde_yaml::Value::Number(4096.into()));
|
|
options
|
|
}
|
|
}
|
|
|