diff --git a/src/meta_plugin/system.rs b/src/meta_plugin/system.rs index 5692b25..69f1f29 100644 --- a/src/meta_plugin/system.rs +++ b/src/meta_plugin/system.rs @@ -15,6 +15,126 @@ pub struct CwdMetaPlugin { meta_name: String, } +#[derive(Debug, Clone, Default)] +pub struct BinaryMetaPlugin { + meta_name: String, + buffer: Vec, + max_buffer_size: usize, +} + +impl BinaryMetaPlugin { + pub fn new() -> BinaryMetaPlugin { + BinaryMetaPlugin { + meta_name: "binary".to_string(), + buffer: Vec::new(), + max_buffer_size: 4096, // 4KB + } + } + + /// Detect if data is binary or text + /// Returns true if data is likely binary, false if likely text + fn is_binary(data: &[u8]) -> bool { + if data.is_empty() { + return false; + } + + // Check if it's valid UTF-8 + if std::str::from_utf8(data).is_ok() { + // Valid UTF-8, but might still be binary + // Check if it's UTF-16 + if data.len() >= 2 { + // Check for BOM + if (data[0] == 0xFF && data[1] == 0xFE) || (data[0] == 0xFE && data[1] == 0xFF) { + // UTF-16 with BOM is text + return false; + } + } + + // Count printable characters + let printable_count = data.iter().filter(|&&b| { + b.is_ascii_alphanumeric() || + b.is_ascii_punctuation() || + b.is_ascii_whitespace() || + b == b' ' || b == b'\t' || b == b'\n' || b == b'\r' + }).count(); + + // If less than 70% of bytes are printable, consider it binary + let printable_ratio = printable_count as f64 / data.len() as f64; + return printable_ratio < 0.7; + } else { + // Not valid UTF-8, likely binary + // But check if it might be UTF-16 without BOM + if data.len() >= 2 && data.len() % 2 == 0 { + // Check if it looks like UTF-16 (every other byte is 0) + let mut zero_count = 0; + for (i, &byte) in data.iter().enumerate() { + if i % 2 == 1 && byte == 0 { + zero_count += 1; + } + } + // If more than 50% of odd positions are zero, might be UTF-16 + if zero_count as f64 / (data.len() / 2) as f64 > 0.5 { + return false; // Likely UTF-16 text + } + } + + // Check for common binary file signatures + if data.len() >= 4 { + // Check for common binary file headers + let headers = [ + &[0x89, 0x50, 0x4E, 0x47], // PNG + &[0xFF, 0xD8, 0xFF, 0xE0], // JPEG + &[0x25, 0x50, 0x44, 0x46], // PDF + &[0x50, 0x4B, 0x03, 0x04], // ZIP + &[0x52, 0x61, 0x72, 0x21], // RAR + ]; + + for header in &headers { + if data.starts_with(header) { + return true; // Definitely binary + } + } + } + + // Count printable characters as a fallback + let printable_count = data.iter().filter(|&&b| { + b.is_ascii_alphanumeric() || + b.is_ascii_punctuation() || + b.is_ascii_whitespace() || + b == b' ' || b == b'\t' || b == b'\n' || b == b'\r' + }).count(); + + // If less than 30% of bytes are printable, consider it binary + let printable_ratio = printable_count as f64 / data.len() as f64; + printable_ratio < 0.3 + } + } +} + +impl MetaPlugin for BinaryMetaPlugin { + fn create(&self) -> Result> { + Ok(Box::new(io::sink())) + } + + fn finalize(&mut self) -> io::Result { + let is_binary = Self::is_binary(&self.buffer); + Ok(if is_binary { "true".to_string() } else { "false".to_string() }) + } + + fn update(&mut self, data: &[u8]) { + // Only collect up to max_buffer_size + let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len()); + if remaining_capacity > 0 { + let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity); + self.buffer.extend_from_slice(&data[..bytes_to_copy]); + } + } + + fn meta_name(&mut self) -> String { + self.meta_name.clone() + } +} + impl CwdMetaPlugin { pub fn new() -> CwdMetaPlugin { CwdMetaPlugin {