From 68d182ee0b60c762f535b3618a79ee29d629cdbb Mon Sep 17 00:00:00 2001 From: Andrew Phillips Date: Mon, 11 Aug 2025 11:34:52 -0300 Subject: [PATCH] feat: expand binary detection to include common Linux file types Co-authored-by: aider (openai/andrew/openrouter/qwen/qwen3-coder) --- src/meta_plugin/system.rs | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/meta_plugin/system.rs b/src/meta_plugin/system.rs index 69f1f29..1f62616 100644 --- a/src/meta_plugin/system.rs +++ b/src/meta_plugin/system.rs @@ -82,11 +82,26 @@ impl BinaryMetaPlugin { if data.len() >= 4 { // Check for common binary file headers let headers = [ + // Image formats &[0x89, 0x50, 0x4E, 0x47], // PNG &[0xFF, 0xD8, 0xFF, 0xE0], // JPEG + &[0x47, 0x49, 0x46, 0x38], // GIF + &[0x42, 0x4D], // BMP + // Document formats &[0x25, 0x50, 0x44, 0x46], // PDF + // Archive formats &[0x50, 0x4B, 0x03, 0x04], // ZIP &[0x52, 0x61, 0x72, 0x21], // RAR + &[0x1F, 0x8B], // GZIP + &[0x42, 0x5A, 0x68], // BZIP2 + &[0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00], // XZ + // TAR has no magic number, but we can check for common patterns + // Executables and object files + &[0x7F, 0x45, 0x4C, 0x46], // ELF + &[0x4D, 0x5A], // Windows PE + // Compressed formats + &[0x1F, 0x9D], // LZW compressed + &[0x1F, 0xA0], // LZH compressed ]; for header in &headers { @@ -94,6 +109,28 @@ impl BinaryMetaPlugin { return true; // Definitely binary } } + + // Special case for TAR files (no consistent magic number) + // Check if it looks like a TAR header + if data.len() >= 512 { + // TAR headers have specific structure + // First 100 bytes are filename (null-terminated) + // Next 8 bytes are file mode (octal, null-terminated) + // If we see this pattern, it's likely a TAR file + let has_tar_structure = + data[0] != 0 && // First byte of filename should not be null + data[100] == 0 && // File mode should start with null + data[101] >= b'0' && data[101] <= b'7'; // File mode should be octal digit + + if has_tar_structure { + return true; + } + } + } + + // Check for AR format (used for static libraries) + if data.len() >= 8 && &data[0..8] == b"!\n" { + return true; } // Count printable characters as a fallback