Files
keep/src/meta_plugin/magic.rs
Andrew Phillips 1b6ff44312 fix: correct magic file type and mime encoding detection
Co-authored-by: aider (openai/andrew/openrouter/qwen/qwen3-coder) <aider@aider.chat>
2025-08-18 08:50:01 -03:00

158 lines
5.6 KiB
Rust

use anyhow::Result;
use magic::{Cookie, CookieFlags};
use rusqlite::Connection;
use std::io;
use crate::meta_plugin::MetaPlugin;
#[derive(Debug)]
pub struct MagicFileMetaPlugin {
buffer: Vec<u8>,
max_buffer_size: usize,
is_saved: bool,
item_id: Option<i64>,
conn: Option<*mut Connection>,
cookie: Option<Cookie>,
}
impl MagicFileMetaPlugin {
pub fn new() -> MagicFileMetaPlugin {
MagicFileMetaPlugin {
buffer: Vec::new(),
max_buffer_size: 4096, // Same as BinaryMetaPlugin
is_saved: false,
item_id: None,
conn: None,
cookie: None,
}
}
fn get_magic_result(&self, flags: CookieFlags) -> io::Result<String> {
// Create a new cookie with the specific flags for this request
let cookie = Cookie::open(flags)
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to open magic cookie: {}", e)))?;
cookie.load(&[] as &[&str])
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to load magic database: {}", e)))?;
let result = cookie.buffer(&self.buffer)
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?;
// Clean up the result - remove extra whitespace and take first part if needed
let trimmed = result.trim();
// For some magic results, we might want just the first part before semicolon or comma
let cleaned = if trimmed.contains(';') {
trimmed.split(';').next().unwrap_or(trimmed).trim()
} else if trimmed.contains(',') && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) {
trimmed.split(',').next().unwrap_or(trimmed).trim()
} else {
trimmed
};
Ok(cleaned.to_string())
}
fn save_all_magic_metadata(&mut self) -> Result<()> {
if let (Some(conn), Some(item_id)) = (self.conn, self.item_id) {
// Convert raw pointer back to reference (unsafe)
let conn_ref = unsafe { &*conn };
// Save file type
if let Ok(file_type) = self.get_magic_result(CookieFlags::empty()) {
if !file_type.is_empty() {
let meta = crate::db::Meta {
id: item_id,
name: "magic_file_type".to_string(),
value: file_type,
};
let _ = crate::db::store_meta(conn_ref, meta);
}
}
// Save MIME type
if let Ok(mime_type) = self.get_magic_result(CookieFlags::MIME_TYPE) {
if !mime_type.is_empty() {
let meta = crate::db::Meta {
id: item_id,
name: "magic_mime_type".to_string(),
value: mime_type,
};
let _ = crate::db::store_meta(conn_ref, meta);
}
}
// Save MIME encoding
if let Ok(mime_encoding) = self.get_magic_result(CookieFlags::MIME_ENCODING) {
if !mime_encoding.is_empty() {
let meta = crate::db::Meta {
id: item_id,
name: "magic_mime_encoding".to_string(),
value: mime_encoding,
};
let _ = crate::db::store_meta(conn_ref, meta);
}
}
// Save combined MIME info (type and encoding together)
if let Ok(mime_combined) = self.get_magic_result(CookieFlags::MIME) {
if !mime_combined.is_empty() {
let meta = crate::db::Meta {
id: item_id,
name: "magic_mime_combined".to_string(),
value: mime_combined,
};
let _ = crate::db::store_meta(conn_ref, meta);
}
}
self.is_saved = true;
}
Ok(())
}
}
impl MetaPlugin for MagicFileMetaPlugin {
fn is_internal(&self) -> bool {
true
}
fn initialize(&mut self, conn: &Connection, item_id: i64) -> Result<()> {
self.item_id = Some(item_id);
// Store raw pointer to connection - unsafe but necessary for this design
self.conn = Some(conn as *const _ as *mut Connection);
Ok(())
}
fn finalize(&mut self) -> Result<()> {
// Save all magic metadata if not already saved
if !self.is_saved {
if let Err(e) = self.save_all_magic_metadata() {
eprintln!("Warning: Failed to save magic metadata: {}", e);
}
}
Ok(())
}
fn update(&mut self, data: &[u8]) {
// Only collect up to max_buffer_size
let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len());
if remaining_capacity > 0 {
let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity);
self.buffer.extend_from_slice(&data[..bytes_to_copy]);
// Check if we've reached our buffer limit and save if so
if self.buffer.len() >= self.max_buffer_size && !self.is_saved {
if let Err(e) = self.save_all_magic_metadata() {
eprintln!("Warning: Failed to save magic metadata early: {}", e);
}
}
}
}
fn meta_name(&mut self) -> String {
"magic_file".to_string()
}
}