feat: add max_buffer_size to MagicFileMetaPlugin and refactor MetaPluginProgram
Co-authored-by: aider (openai/andrew/openrouter/anthropic/claude-sonnet-4) <aider@aider.chat>
This commit is contained in:
@@ -1,52 +1,56 @@
|
||||
use anyhow::Result;
|
||||
use magic::{Cookie, CookieFlags};
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use rusqlite::Connection;
|
||||
|
||||
use crate::meta_plugin::MetaPlugin;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug)]
|
||||
pub struct MagicFileMetaPlugin {
|
||||
buffer: Vec<u8>,
|
||||
max_buffer_size: usize,
|
||||
is_saved: bool,
|
||||
item_id: Option<i64>,
|
||||
conn: Option<*mut Connection>,
|
||||
cookie: Option<Cookie>,
|
||||
}
|
||||
|
||||
impl MagicFileMetaPlugin {
|
||||
pub fn new() -> MagicFileMetaPlugin {
|
||||
MagicFileMetaPlugin {
|
||||
buffer: Vec::new(),
|
||||
max_buffer_size: 4096, // Same as BinaryMetaPlugin
|
||||
is_saved: false,
|
||||
item_id: None,
|
||||
conn: None,
|
||||
cookie: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_magic_result(&self, flags: CookieFlags) -> io::Result<String> {
|
||||
let cookie = Cookie::open(flags)
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to open magic cookie: {}", e)))?;
|
||||
if let Some(ref cookie) = self.cookie {
|
||||
let result = cookie.buffer(&self.buffer)
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?;
|
||||
|
||||
cookie.load(&[] as &[&str])
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to load magic database: {}", e)))?;
|
||||
// Clean up the result - remove extra whitespace and take first part if needed
|
||||
let trimmed = result.trim();
|
||||
|
||||
let result = cookie.buffer(&self.buffer)
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to analyze buffer: {}", e)))?;
|
||||
// For some magic results, we might want just the first part before semicolon or comma
|
||||
let cleaned = if trimmed.contains(';') {
|
||||
trimmed.split(';').next().unwrap_or(trimmed).trim()
|
||||
} else if trimmed.contains(',') && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) {
|
||||
trimmed.split(',').next().unwrap_or(trimmed).trim()
|
||||
} else {
|
||||
trimmed
|
||||
};
|
||||
|
||||
// Clean up the result - remove extra whitespace and take first part if needed
|
||||
let trimmed = result.trim();
|
||||
|
||||
// For some magic results, we might want just the first part before semicolon or comma
|
||||
let cleaned = if trimmed.contains(';') {
|
||||
trimmed.split(';').next().unwrap_or(trimmed).trim()
|
||||
} else if trimmed.contains(',') && flags.contains(CookieFlags::MIME_TYPE | CookieFlags::MIME_ENCODING) {
|
||||
trimmed.split(',').next().unwrap_or(trimmed).trim()
|
||||
Ok(cleaned.to_string())
|
||||
} else {
|
||||
trimmed
|
||||
};
|
||||
|
||||
Ok(cleaned.to_string())
|
||||
Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
"Magic cookie not initialized".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
fn save_all_magic_metadata(&mut self) -> Result<()> {
|
||||
@@ -94,14 +98,17 @@ impl MetaPlugin for MagicFileMetaPlugin {
|
||||
true
|
||||
}
|
||||
|
||||
fn create(&self) -> Result<Box<dyn Write>> {
|
||||
// For meta plugins, we don't actually create a writer since we're buffering data internally
|
||||
Ok(Box::new(DummyWriter))
|
||||
}
|
||||
|
||||
fn initialize(&mut self, conn: &Connection, item_id: i64) -> Result<()> {
|
||||
self.item_id = Some(item_id);
|
||||
self.conn = Some(conn as *const Connection as *mut Connection);
|
||||
|
||||
// Initialize magic cookie
|
||||
let cookie = Cookie::open(CookieFlags::empty())
|
||||
.map_err(|e| anyhow::anyhow!("Failed to open magic cookie: {}", e))?;
|
||||
cookie.load(&[] as &[&str])
|
||||
.map_err(|e| anyhow::anyhow!("Failed to load magic database: {}", e))?;
|
||||
self.cookie = Some(cookie);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -117,7 +124,21 @@ impl MetaPlugin for MagicFileMetaPlugin {
|
||||
}
|
||||
|
||||
fn update(&mut self, data: &[u8]) {
|
||||
self.buffer.extend_from_slice(data);
|
||||
// Only collect up to max_buffer_size
|
||||
let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len());
|
||||
if remaining_capacity > 0 {
|
||||
let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity);
|
||||
self.buffer.extend_from_slice(&data[..bytes_to_copy]);
|
||||
|
||||
// Check if we've reached our buffer limit and save if so
|
||||
if self.buffer.len() >= self.max_buffer_size && !self.is_saved {
|
||||
if let (Some(conn), Some(item_id)) = (self.conn, self.item_id) {
|
||||
if let Err(e) = self.save_all_magic_metadata() {
|
||||
eprintln!("Warning: Failed to save magic metadata early: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn meta_name(&mut self) -> String {
|
||||
@@ -125,16 +146,3 @@ impl MetaPlugin for MagicFileMetaPlugin {
|
||||
}
|
||||
}
|
||||
|
||||
// Dummy writer that implements Write but doesn't do anything
|
||||
// This is needed to satisfy the MetaPlugin trait requirements
|
||||
struct DummyWriter;
|
||||
|
||||
impl Write for DummyWriter {
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
Ok(buf.len())
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
use crate::plugins::ProgramWriter;
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
use log::*;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::process::{Command, Stdio};
|
||||
use std::process::{Command, Stdio, Child};
|
||||
use which::which;
|
||||
|
||||
use crate::meta_plugin::MetaPlugin;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Debug)]
|
||||
pub struct MetaPluginProgram {
|
||||
pub program: String,
|
||||
pub args: Vec<String>,
|
||||
pub supported: bool,
|
||||
pub meta_name: String,
|
||||
pub split_whitespace: bool,
|
||||
buffer: Vec<u8>,
|
||||
process: Option<Child>,
|
||||
writer: Option<Box<dyn Write>>,
|
||||
}
|
||||
|
||||
impl MetaPluginProgram {
|
||||
@@ -29,7 +29,8 @@ impl MetaPluginProgram {
|
||||
supported,
|
||||
meta_name,
|
||||
split_whitespace,
|
||||
buffer: Vec::new(),
|
||||
process: None,
|
||||
writer: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -43,8 +44,8 @@ impl MetaPlugin for MetaPluginProgram {
|
||||
false
|
||||
}
|
||||
|
||||
fn create(&self) -> Result<Box<dyn Write>> {
|
||||
debug!("META: Writing using {:?}", *self);
|
||||
fn initialize(&mut self, _conn: &rusqlite::Connection, _item_id: i64) -> Result<()> {
|
||||
debug!("META: Initializing program plugin: {:?}", self);
|
||||
|
||||
let program = self.program.clone();
|
||||
let args = self.args.clone();
|
||||
@@ -55,6 +56,7 @@ impl MetaPlugin for MetaPluginProgram {
|
||||
.args(args.clone())
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.spawn()
|
||||
.context(anyhow!(
|
||||
"Problem spawning child process: {:?} {:?}",
|
||||
@@ -62,58 +64,59 @@ impl MetaPlugin for MetaPluginProgram {
|
||||
args
|
||||
))?;
|
||||
|
||||
Ok(Box::new(ProgramWriter {
|
||||
stdin: process.stdin.take().unwrap(),
|
||||
}))
|
||||
let stdin = process.stdin.take().unwrap();
|
||||
self.writer = Some(Box::new(stdin));
|
||||
self.process = Some(process);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn finalize(&mut self) -> io::Result<String> {
|
||||
let program = self.program.clone();
|
||||
let args = self.args.clone();
|
||||
debug!("META: Finalizing program plugin");
|
||||
|
||||
debug!("META: Executing command for finalize: {:?} {:?}", program, args);
|
||||
// Close stdin to signal EOF to the process
|
||||
self.writer.take();
|
||||
|
||||
let mut process = Command::new(program)
|
||||
.args(args)
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.spawn()
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to spawn process: {}", e)))?;
|
||||
if let Some(mut process) = self.process.take() {
|
||||
let output = process.wait_with_output()
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to wait for process: {}", e)))?;
|
||||
|
||||
let stdin = process.stdin.as_mut().unwrap();
|
||||
stdin.write_all(&self.buffer)
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to write to stdin: {}", e)))?;
|
||||
if output.status.success() {
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let trimmed_result = stdout.trim();
|
||||
|
||||
let output = process.wait_with_output()
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("Failed to wait for process: {}", e)))?;
|
||||
|
||||
if output.status.success() {
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let trimmed_result = stdout.trim();
|
||||
|
||||
// For certain programs, we only want the first part before whitespace
|
||||
if self.split_whitespace {
|
||||
let parts: Vec<&str> = trimmed_result.split_whitespace().collect();
|
||||
if !parts.is_empty() {
|
||||
Ok(parts[0].to_string())
|
||||
// For certain programs, we only want the first part before whitespace
|
||||
if self.split_whitespace {
|
||||
let parts: Vec<&str> = trimmed_result.split_whitespace().collect();
|
||||
if !parts.is_empty() {
|
||||
Ok(parts[0].to_string())
|
||||
} else {
|
||||
Ok(trimmed_result.to_string())
|
||||
}
|
||||
} else {
|
||||
Ok(trimmed_result.to_string())
|
||||
}
|
||||
} else {
|
||||
Ok(trimmed_result.to_string())
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("Command failed: {}", stderr.trim()),
|
||||
))
|
||||
}
|
||||
} else {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("Command failed: {}", stderr.trim()),
|
||||
"No process to finalize".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
fn update(&mut self, data: &[u8]) {
|
||||
self.buffer.extend_from_slice(data);
|
||||
if let Some(ref mut writer) = self.writer {
|
||||
if let Err(e) = writer.write_all(data) {
|
||||
debug!("META: Failed to write to process stdin: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn meta_name(&mut self) -> String {
|
||||
|
||||
Reference in New Issue
Block a user