feat: add max_buffer_size to MagicFileMetaPlugin and refactor MetaPluginProgram

Co-authored-by: aider (openai/andrew/openrouter/anthropic/claude-sonnet-4) <aider@aider.chat>
This commit is contained in:
Andrew Phillips
2025-08-18 07:56:56 -03:00
parent 133538881f
commit c38ae0d4a9
2 changed files with 91 additions and 80 deletions

View File

@@ -1,52 +1,56 @@
use anyhow::Result; use anyhow::Result;
use magic::{Cookie, CookieFlags}; use magic::{Cookie, CookieFlags};
use std::io; use std::io;
use std::io::Write;
use rusqlite::Connection; use rusqlite::Connection;
use crate::meta_plugin::MetaPlugin; use crate::meta_plugin::MetaPlugin;
#[derive(Debug, Clone)] #[derive(Debug)]
pub struct MagicFileMetaPlugin { pub struct MagicFileMetaPlugin {
buffer: Vec<u8>, buffer: Vec<u8>,
max_buffer_size: usize,
is_saved: bool, is_saved: bool,
item_id: Option<i64>, item_id: Option<i64>,
conn: Option<*mut Connection>, conn: Option<*mut Connection>,
cookie: Option<Cookie>,
} }
impl MagicFileMetaPlugin { impl MagicFileMetaPlugin {
pub fn new() -> MagicFileMetaPlugin { pub fn new() -> MagicFileMetaPlugin {
MagicFileMetaPlugin { MagicFileMetaPlugin {
buffer: Vec::new(), buffer: Vec::new(),
max_buffer_size: 4096, // Same as BinaryMetaPlugin
is_saved: false, is_saved: false,
item_id: None, item_id: None,
conn: None, conn: None,
cookie: None,
} }
} }
fn get_magic_result(&self, flags: CookieFlags) -> io::Result<String> { fn get_magic_result(&self, flags: CookieFlags) -> io::Result<String> {
let cookie = Cookie::open(flags) if let Some(ref cookie) = self.cookie {
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to open magic cookie: {}", e)))?; let result = cookie.buffer(&self.buffer)
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?;
cookie.load(&[] as &[&str]) // Clean up the result - remove extra whitespace and take first part if needed
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to load magic database: {}", e)))?; let trimmed = result.trim();
// For some magic results, we might want just the first part before semicolon or comma
let cleaned = if trimmed.contains(';') {
trimmed.split(';').next().unwrap_or(trimmed).trim()
} else if trimmed.contains(',') && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) {
trimmed.split(',').next().unwrap_or(trimmed).trim()
} else {
trimmed
};
let result = cookie.buffer(&self.buffer) Ok(cleaned.to_string())
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?;
// Clean up the result - remove extra whitespace and take first part if needed
let trimmed = result.trim();
// For some magic results, we might want just the first part before semicolon or comma
let cleaned = if trimmed.contains(';') {
trimmed.split(';').next().unwrap_or(trimmed).trim()
} else if trimmed.contains(',') && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) {
trimmed.split(',').next().unwrap_or(trimmed).trim()
} else { } else {
trimmed Err(io::Error::new(
}; io::ErrorKind::Other,
"Magic cookie not initialized".to_string(),
Ok(cleaned.to_string()) ))
}
} }
fn save_all_magic_metadata(&mut self) -> Result<()> { fn save_all_magic_metadata(&mut self) -> Result<()> {
@@ -94,14 +98,17 @@ impl MetaPlugin for MagicFileMetaPlugin {
true true
} }
fn create(&self) -> Result<Box<dyn Write>> {
// For meta plugins, we don't actually create a writer since we're buffering data internally
Ok(Box::new(DummyWriter))
}
fn initialize(&mut self, conn: &Connection, item_id: i64) -> Result<()> { fn initialize(&mut self, conn: &Connection, item_id: i64) -> Result<()> {
self.item_id = Some(item_id); self.item_id = Some(item_id);
self.conn = Some(conn as *const Connection as *mut Connection); self.conn = Some(conn as *const Connection as *mut Connection);
// Initialize magic cookie
let cookie = Cookie::open(CookieFlags::empty())
.map_err(|e| anyhow::anyhow!("Failed to open magic cookie: {}", e))?;
cookie.load(&[] as &[&str])
.map_err(|e| anyhow::anyhow!("Failed to load magic database: {}", e))?;
self.cookie = Some(cookie);
Ok(()) Ok(())
} }
@@ -117,7 +124,21 @@ impl MetaPlugin for MagicFileMetaPlugin {
} }
fn update(&mut self, data: &[u8]) { fn update(&mut self, data: &[u8]) {
self.buffer.extend_from_slice(data); // Only collect up to max_buffer_size
let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len());
if remaining_capacity > 0 {
let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity);
self.buffer.extend_from_slice(&data[..bytes_to_copy]);
// Check if we've reached our buffer limit and save if so
if self.buffer.len() >= self.max_buffer_size && !self.is_saved {
if let (Some(conn), Some(item_id)) = (self.conn, self.item_id) {
if let Err(e) = self.save_all_magic_metadata() {
eprintln!("Warning: Failed to save magic metadata early: {}", e);
}
}
}
}
} }
fn meta_name(&mut self) -> String { fn meta_name(&mut self) -> String {
@@ -125,16 +146,3 @@ impl MetaPlugin for MagicFileMetaPlugin {
} }
} }
// Dummy writer that implements Write but doesn't do anything
// This is needed to satisfy the MetaPlugin trait requirements
struct DummyWriter;
impl Write for DummyWriter {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
Ok(buf.len())
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}

View File

@@ -1,21 +1,21 @@
use crate::plugins::ProgramWriter;
use anyhow::{Context, Result, anyhow}; use anyhow::{Context, Result, anyhow};
use log::*; use log::*;
use std::io; use std::io;
use std::io::Write; use std::io::Write;
use std::process::{Command, Stdio}; use std::process::{Command, Stdio, Child};
use which::which; use which::which;
use crate::meta_plugin::MetaPlugin; use crate::meta_plugin::MetaPlugin;
#[derive(Clone, Debug)] #[derive(Debug)]
pub struct MetaPluginProgram { pub struct MetaPluginProgram {
pub program: String, pub program: String,
pub args: Vec<String>, pub args: Vec<String>,
pub supported: bool, pub supported: bool,
pub meta_name: String, pub meta_name: String,
pub split_whitespace: bool, pub split_whitespace: bool,
buffer: Vec<u8>, process: Option<Child>,
writer: Option<Box<dyn Write>>,
} }
impl MetaPluginProgram { impl MetaPluginProgram {
@@ -29,7 +29,8 @@ impl MetaPluginProgram {
supported, supported,
meta_name, meta_name,
split_whitespace, split_whitespace,
buffer: Vec::new(), process: None,
writer: None,
} }
} }
} }
@@ -43,8 +44,8 @@ impl MetaPlugin for MetaPluginProgram {
false false
} }
fn create(&self) -> Result<Box<dyn Write>> { fn initialize(&mut self, _conn: &rusqlite::Connection, _item_id: i64) -> Result<()> {
debug!("META: Writing using {:?}", *self); debug!("META: Initializing program plugin: {:?}", self);
let program = self.program.clone(); let program = self.program.clone();
let args = self.args.clone(); let args = self.args.clone();
@@ -55,6 +56,7 @@ impl MetaPlugin for MetaPluginProgram {
.args(args.clone()) .args(args.clone())
.stdin(Stdio::piped()) .stdin(Stdio::piped())
.stdout(Stdio::piped()) .stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn() .spawn()
.context(anyhow!( .context(anyhow!(
"Problem spawning child process: {:?} {:?}", "Problem spawning child process: {:?} {:?}",
@@ -62,58 +64,59 @@ impl MetaPlugin for MetaPluginProgram {
args args
))?; ))?;
Ok(Box::new(ProgramWriter { let stdin = process.stdin.take().unwrap();
stdin: process.stdin.take().unwrap(), self.writer = Some(Box::new(stdin));
})) self.process = Some(process);
Ok(())
} }
fn finalize(&mut self) -> io::Result<String> { fn finalize(&mut self) -> io::Result<String> {
let program = self.program.clone(); debug!("META: Finalizing program plugin");
let args = self.args.clone();
debug!("META: Executing command for finalize: {:?} {:?}", program, args); // Close stdin to signal EOF to the process
self.writer.take();
let mut process = Command::new(program) if let Some(mut process) = self.process.take() {
.args(args) let output = process.wait_with_output()
.stdin(Stdio::piped()) .map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to wait for process: {}", e)))?;
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to spawn process: {}", e)))?;
let stdin = process.stdin.as_mut().unwrap(); if output.status.success() {
stdin.write_all(&self.buffer) let stdout = String::from_utf8_lossy(&output.stdout);
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to write to stdin: {}", e)))?; let trimmed_result = stdout.trim();
let output = process.wait_with_output() // For certain programs, we only want the first part before whitespace
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to wait for process: {}", e)))?; if self.split_whitespace {
let parts: Vec<&str> = trimmed_result.split_whitespace().collect();
if output.status.success() { if !parts.is_empty() {
let stdout = String::from_utf8_lossy(&output.stdout); Ok(parts[0].to_string())
let trimmed_result = stdout.trim(); } else {
Ok(trimmed_result.to_string())
// For certain programs, we only want the first part before whitespace }
if self.split_whitespace {
let parts: Vec<&str> = trimmed_result.split_whitespace().collect();
if !parts.is_empty() {
Ok(parts[0].to_string())
} else { } else {
Ok(trimmed_result.to_string()) Ok(trimmed_result.to_string())
} }
} else { } else {
Ok(trimmed_result.to_string()) let stderr = String::from_utf8_lossy(&output.stderr);
Err(io::Error::new(
io::ErrorKind::Other,
format!("Command failed: {}", stderr.trim()),
))
} }
} else { } else {
let stderr = String::from_utf8_lossy(&output.stderr);
Err(io::Error::new( Err(io::Error::new(
io::ErrorKind::Other, io::ErrorKind::Other,
format!("Command failed: {}", stderr.trim()), "No process to finalize".to_string(),
)) ))
} }
} }
fn update(&mut self, data: &[u8]) { fn update(&mut self, data: &[u8]) {
self.buffer.extend_from_slice(data); if let Some(ref mut writer) = self.writer {
if let Err(e) = writer.write_all(data) {
debug!("META: Failed to write to process stdin: {}", e);
}
}
} }
fn meta_name(&mut self) -> String { fn meta_name(&mut self) -> String {