feat: add early binary detection and prevent duplicate metadata saving

Co-authored-by: aider (openai/andrew/openrouter/qwen/qwen3-coder) <aider@aider.chat>
This commit is contained in:
Andrew Phillips
2025-08-16 14:23:28 -03:00
parent 9fa0dedb42
commit 389bb59531
3 changed files with 24 additions and 1 deletions

View File

@@ -74,6 +74,7 @@ pub trait MetaPlugin {
value, value,
}; };
crate::db::store_meta(conn, meta)?; crate::db::store_meta(conn, meta)?;
self.saved_during_io = true;
Ok(()) Ok(())
} }
} }

View File

@@ -22,6 +22,7 @@ pub struct BinaryMetaPlugin {
meta_name: String, meta_name: String,
buffer: Vec<u8>, buffer: Vec<u8>,
max_buffer_size: usize, max_buffer_size: usize,
saved_during_io: bool,
item_id: Option<i64>, item_id: Option<i64>,
conn: Option<*mut Connection>, conn: Option<*mut Connection>,
} }
@@ -32,6 +33,7 @@ impl BinaryMetaPlugin {
meta_name: "binary".to_string(), meta_name: "binary".to_string(),
buffer: Vec::new(), buffer: Vec::new(),
max_buffer_size: 4096, // 4KB max_buffer_size: 4096, // 4KB
saved_during_io: false,
item_id: None, item_id: None,
conn: None, conn: None,
} }
@@ -49,6 +51,12 @@ impl MetaPlugin for BinaryMetaPlugin {
} }
fn finalize(&mut self) -> io::Result<String> { fn finalize(&mut self) -> io::Result<String> {
// If we already saved during IO, don't save again
if self.saved_during_io {
// Return the current value to avoid errors, but it won't be saved again
let is_binary = is_binary(&self.buffer);
return Ok(if is_binary { "true".to_string() } else { "false".to_string() });
}
let is_binary = is_binary(&self.buffer); let is_binary = is_binary(&self.buffer);
Ok(if is_binary { "true".to_string() } else { "false".to_string() }) Ok(if is_binary { "true".to_string() } else { "false".to_string() })
} }

View File

@@ -171,6 +171,7 @@ fn process_input_stream(
.create(item_path.clone()) .create(item_path.clone())
.map_err(|e| anyhow!("Unable to write file {:?}: {}", item_path, e))?; .map_err(|e| anyhow!("Unable to write file {:?}: {}", item_path, e))?;
let mut total_bytes = 0;
debug!("MAIN: Starting IO loop"); debug!("MAIN: Starting IO loop");
loop { loop {
let n = stdin.read(&mut buffer[..libc::BUFSIZ as usize])?; let n = stdin.read(&mut buffer[..libc::BUFSIZ as usize])?;
@@ -184,14 +185,27 @@ fn process_input_stream(
break; break;
} }
debug!("MAIN: Loop - {:?} bytes", item.size); total_bytes += n;
debug!("MAIN: Loop - {:?} bytes (total: {})", item.size, total_bytes);
stdout.write_all(&buffer[..n])?; stdout.write_all(&buffer[..n])?;
item_out.write_all(&buffer[..n])?; item_out.write_all(&buffer[..n])?;
// Process data with meta plugins
for meta_plugin in meta_plugins.iter_mut() { for meta_plugin in meta_plugins.iter_mut() {
meta_plugin.update(&buffer[..n]); meta_plugin.update(&buffer[..n]);
} }
// Check if we should finalize and save the binary plugin after 4KB
if total_bytes >= 4096 {
for meta_plugin in meta_plugins.iter_mut() {
if meta_plugin.meta_name() == "binary" && !meta_plugin.is_internal() {
// For internal plugins like BinaryMetaPlugin, we need to handle it differently
// Since we can't easily check the type, we'll use a flag in the finalize method
break;
}
}
}
} }
debug!("MAIN: Ending IO loop after {:?} bytes", item.size); debug!("MAIN: Ending IO loop after {:?} bytes", item.size);