feat: expand binary detection to include common Linux file types

Co-authored-by: aider (openai/andrew/openrouter/qwen/qwen3-coder) <aider@aider.chat>
This commit is contained in:
Andrew Phillips
2025-08-11 11:34:52 -03:00
parent dc550c3f35
commit 68d182ee0b

View File

@@ -82,11 +82,26 @@ impl BinaryMetaPlugin {
if data.len() >= 4 { if data.len() >= 4 {
// Check for common binary file headers // Check for common binary file headers
let headers = [ let headers = [
// Image formats
&[0x89, 0x50, 0x4E, 0x47], // PNG &[0x89, 0x50, 0x4E, 0x47], // PNG
&[0xFF, 0xD8, 0xFF, 0xE0], // JPEG &[0xFF, 0xD8, 0xFF, 0xE0], // JPEG
&[0x47, 0x49, 0x46, 0x38], // GIF
&[0x42, 0x4D], // BMP
// Document formats
&[0x25, 0x50, 0x44, 0x46], // PDF &[0x25, 0x50, 0x44, 0x46], // PDF
// Archive formats
&[0x50, 0x4B, 0x03, 0x04], // ZIP &[0x50, 0x4B, 0x03, 0x04], // ZIP
&[0x52, 0x61, 0x72, 0x21], // RAR &[0x52, 0x61, 0x72, 0x21], // RAR
&[0x1F, 0x8B], // GZIP
&[0x42, 0x5A, 0x68], // BZIP2
&[0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00], // XZ
// TAR has no magic number, but we can check for common patterns
// Executables and object files
&[0x7F, 0x45, 0x4C, 0x46], // ELF
&[0x4D, 0x5A], // Windows PE
// Compressed formats
&[0x1F, 0x9D], // LZW compressed
&[0x1F, 0xA0], // LZH compressed
]; ];
for header in &headers { for header in &headers {
@@ -94,6 +109,28 @@ impl BinaryMetaPlugin {
return true; // Definitely binary return true; // Definitely binary
} }
} }
// Special case for TAR files (no consistent magic number)
// Check if it looks like a TAR header
if data.len() >= 512 {
// TAR headers have specific structure
// First 100 bytes are filename (null-terminated)
// Next 8 bytes are file mode (octal, null-terminated)
// If we see this pattern, it's likely a TAR file
let has_tar_structure =
data[0] != 0 && // First byte of filename should not be null
data[100] == 0 && // File mode should start with null
data[101] >= b'0' && data[101] <= b'7'; // File mode should be octal digit
if has_tar_structure {
return true;
}
}
}
// Check for AR format (used for static libraries)
if data.len() >= 8 && &data[0..8] == b"!<arch>\n" {
return true;
} }
// Count printable characters as a fallback // Count printable characters as a fallback