Files
keep/src/meta_plugin/system.rs
Andrew Phillips 68d182ee0b feat: expand binary detection to include common Linux file types
Co-authored-by: aider (openai/andrew/openrouter/qwen/qwen3-coder) <aider@aider.chat>
2025-08-11 11:34:52 -03:00

519 lines
14 KiB
Rust

use anyhow::Result;
use gethostname::gethostname;
use local_ip_address::local_ip;
use dns_lookup::lookup_addr;
use std::io;
use std::io::Write;
use std::env;
use std::process;
use uzers::{get_current_uid, get_current_gid, get_current_username, get_current_groupname};
use crate::meta_plugin::MetaPlugin;
#[derive(Debug, Clone, Default)]
pub struct CwdMetaPlugin {
meta_name: String,
}
#[derive(Debug, Clone, Default)]
pub struct BinaryMetaPlugin {
meta_name: String,
buffer: Vec<u8>,
max_buffer_size: usize,
}
impl BinaryMetaPlugin {
pub fn new() -> BinaryMetaPlugin {
BinaryMetaPlugin {
meta_name: "binary".to_string(),
buffer: Vec::new(),
max_buffer_size: 4096, // 4KB
}
}
/// Detect if data is binary or text
/// Returns true if data is likely binary, false if likely text
fn is_binary(data: &[u8]) -> bool {
if data.is_empty() {
return false;
}
// Check if it's valid UTF-8
if std::str::from_utf8(data).is_ok() {
// Valid UTF-8, but might still be binary
// Check if it's UTF-16
if data.len() >= 2 {
// Check for BOM
if (data[0] == 0xFF && data[1] == 0xFE) || (data[0] == 0xFE && data[1] == 0xFF) {
// UTF-16 with BOM is text
return false;
}
}
// Count printable characters
let printable_count = data.iter().filter(|&&b| {
b.is_ascii_alphanumeric() ||
b.is_ascii_punctuation() ||
b.is_ascii_whitespace() ||
b == b' ' || b == b'\t' || b == b'\n' || b == b'\r'
}).count();
// If less than 70% of bytes are printable, consider it binary
let printable_ratio = printable_count as f64 / data.len() as f64;
return printable_ratio < 0.7;
} else {
// Not valid UTF-8, likely binary
// But check if it might be UTF-16 without BOM
if data.len() >= 2 && data.len() % 2 == 0 {
// Check if it looks like UTF-16 (every other byte is 0)
let mut zero_count = 0;
for (i, &byte) in data.iter().enumerate() {
if i % 2 == 1 && byte == 0 {
zero_count += 1;
}
}
// If more than 50% of odd positions are zero, might be UTF-16
if zero_count as f64 / (data.len() / 2) as f64 > 0.5 {
return false; // Likely UTF-16 text
}
}
// Check for common binary file signatures
if data.len() >= 4 {
// Check for common binary file headers
let headers = [
// Image formats
&[0x89, 0x50, 0x4E, 0x47], // PNG
&[0xFF, 0xD8, 0xFF, 0xE0], // JPEG
&[0x47, 0x49, 0x46, 0x38], // GIF
&[0x42, 0x4D], // BMP
// Document formats
&[0x25, 0x50, 0x44, 0x46], // PDF
// Archive formats
&[0x50, 0x4B, 0x03, 0x04], // ZIP
&[0x52, 0x61, 0x72, 0x21], // RAR
&[0x1F, 0x8B], // GZIP
&[0x42, 0x5A, 0x68], // BZIP2
&[0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00], // XZ
// TAR has no magic number, but we can check for common patterns
// Executables and object files
&[0x7F, 0x45, 0x4C, 0x46], // ELF
&[0x4D, 0x5A], // Windows PE
// Compressed formats
&[0x1F, 0x9D], // LZW compressed
&[0x1F, 0xA0], // LZH compressed
];
for header in &headers {
if data.starts_with(header) {
return true; // Definitely binary
}
}
// Special case for TAR files (no consistent magic number)
// Check if it looks like a TAR header
if data.len() >= 512 {
// TAR headers have specific structure
// First 100 bytes are filename (null-terminated)
// Next 8 bytes are file mode (octal, null-terminated)
// If we see this pattern, it's likely a TAR file
let has_tar_structure =
data[0] != 0 && // First byte of filename should not be null
data[100] == 0 && // File mode should start with null
data[101] >= b'0' && data[101] <= b'7'; // File mode should be octal digit
if has_tar_structure {
return true;
}
}
}
// Check for AR format (used for static libraries)
if data.len() >= 8 && &data[0..8] == b"!<arch>\n" {
return true;
}
// Count printable characters as a fallback
let printable_count = data.iter().filter(|&&b| {
b.is_ascii_alphanumeric() ||
b.is_ascii_punctuation() ||
b.is_ascii_whitespace() ||
b == b' ' || b == b'\t' || b == b'\n' || b == b'\r'
}).count();
// If less than 30% of bytes are printable, consider it binary
let printable_ratio = printable_count as f64 / data.len() as f64;
printable_ratio < 0.3
}
}
}
impl MetaPlugin for BinaryMetaPlugin {
fn create(&self) -> Result<Box<dyn Write>> {
Ok(Box::new(io::sink()))
}
fn finalize(&mut self) -> io::Result<String> {
let is_binary = Self::is_binary(&self.buffer);
Ok(if is_binary { "true".to_string() } else { "false".to_string() })
}
fn update(&mut self, data: &[u8]) {
// Only collect up to max_buffer_size
let remaining_capacity = self.max_buffer_size.saturating_sub(self.buffer.len());
if remaining_capacity > 0 {
let bytes_to_copy = std::cmp::min(data.len(), remaining_capacity);
self.buffer.extend_from_slice(&data[..bytes_to_copy]);
}
}
fn meta_name(&mut self) -> String {
self.meta_name.clone()
}
}
impl CwdMetaPlugin {
pub fn new() -> CwdMetaPlugin {
CwdMetaPlugin {
meta_name: "cwd".to_string(),
}
}
}
impl MetaPlugin for CwdMetaPlugin {
fn create(&self) -> Result<Box<dyn Write>> {
Ok(Box::new(io::sink()))
}
fn finalize(&mut self) -> io::Result<String> {
match env::current_dir() {
Ok(path) => Ok(path.to_string_lossy().to_string()),
Err(_) => Ok("unknown".to_string()),
}
}
fn update(&mut self, _data: &[u8]) {
// No update needed
}
fn meta_name(&mut self) -> String {
self.meta_name.clone()
}
}
#[derive(Debug, Clone, Default)]
pub struct UidMetaPlugin {
meta_name: String,
}
impl UidMetaPlugin {
pub fn new() -> UidMetaPlugin {
UidMetaPlugin {
meta_name: "uid".to_string(),
}
}
}
impl MetaPlugin for UidMetaPlugin {
fn create(&self) -> Result<Box<dyn Write>> {
Ok(Box::new(io::sink()))
}
fn finalize(&mut self) -> io::Result<String> {
Ok(get_current_uid().to_string())
}
fn update(&mut self, _data: &[u8]) {
// No update needed
}
fn meta_name(&mut self) -> String {
self.meta_name.clone()
}
}
#[derive(Debug, Clone, Default)]
pub struct UserMetaPlugin {
meta_name: String,
}
impl UserMetaPlugin {
pub fn new() -> UserMetaPlugin {
UserMetaPlugin {
meta_name: "user".to_string(),
}
}
}
impl MetaPlugin for UserMetaPlugin {
fn create(&self) -> Result<Box<dyn Write>> {
Ok(Box::new(io::sink()))
}
fn finalize(&mut self) -> io::Result<String> {
match get_current_username() {
Some(username) => Ok(username.to_string_lossy().to_string()),
None => Ok("unknown".to_string()),
}
}
fn update(&mut self, _data: &[u8]) {
// No update needed
}
fn meta_name(&mut self) -> String {
self.meta_name.clone()
}
}
#[derive(Debug, Clone, Default)]
pub struct GidMetaPlugin {
meta_name: String,
}
impl GidMetaPlugin {
pub fn new() -> GidMetaPlugin {
GidMetaPlugin {
meta_name: "gid".to_string(),
}
}
}
impl MetaPlugin for GidMetaPlugin {
fn create(&self) -> Result<Box<dyn Write>> {
Ok(Box::new(io::sink()))
}
fn finalize(&mut self) -> io::Result<String> {
Ok(get_current_gid().to_string())
}
fn update(&mut self, _data: &[u8]) {
// No update needed
}
fn meta_name(&mut self) -> String {
self.meta_name.clone()
}
}
#[derive(Debug, Clone, Default)]
pub struct GroupMetaPlugin {
meta_name: String,
}
impl GroupMetaPlugin {
pub fn new() -> GroupMetaPlugin {
GroupMetaPlugin {
meta_name: "group".to_string(),
}
}
}
impl MetaPlugin for GroupMetaPlugin {
fn create(&self) -> Result<Box<dyn Write>> {
Ok(Box::new(io::sink()))
}
fn finalize(&mut self) -> io::Result<String> {
match get_current_groupname() {
Some(groupname) => Ok(groupname.to_string_lossy().to_string()),
None => Ok("unknown".to_string()),
}
}
fn update(&mut self, _data: &[u8]) {
// No update needed
}
fn meta_name(&mut self) -> String {
self.meta_name.clone()
}
}
#[derive(Debug, Clone, Default)]
pub struct ShellMetaPlugin {
meta_name: String,
}
impl ShellMetaPlugin {
pub fn new() -> ShellMetaPlugin {
ShellMetaPlugin {
meta_name: "shell".to_string(),
}
}
}
impl MetaPlugin for ShellMetaPlugin {
fn create(&self) -> Result<Box<dyn Write>> {
Ok(Box::new(io::sink()))
}
fn finalize(&mut self) -> io::Result<String> {
match env::var("SHELL") {
Ok(shell) => Ok(shell),
Err(_) => Ok("unknown".to_string()),
}
}
fn update(&mut self, _data: &[u8]) {
// No update needed
}
fn meta_name(&mut self) -> String {
self.meta_name.clone()
}
}
#[derive(Debug, Clone, Default)]
pub struct ShellPidMetaPlugin {
meta_name: String,
}
impl ShellPidMetaPlugin {
pub fn new() -> ShellPidMetaPlugin {
ShellPidMetaPlugin {
meta_name: "shell_pid".to_string(),
}
}
}
impl MetaPlugin for ShellPidMetaPlugin {
fn create(&self) -> Result<Box<dyn Write>> {
Ok(Box::new(io::sink()))
}
fn finalize(&mut self) -> io::Result<String> {
match env::var("PPID") {
Ok(ppid) => Ok(ppid),
Err(_) => Ok(process::id().to_string()),
}
}
fn update(&mut self, _data: &[u8]) {
// No update needed
}
fn meta_name(&mut self) -> String {
self.meta_name.clone()
}
}
#[derive(Debug, Clone, Default)]
pub struct KeepPidMetaPlugin {
meta_name: String,
}
impl KeepPidMetaPlugin {
pub fn new() -> KeepPidMetaPlugin {
KeepPidMetaPlugin {
meta_name: "keep_pid".to_string(),
}
}
}
impl MetaPlugin for KeepPidMetaPlugin {
fn create(&self) -> Result<Box<dyn Write>> {
Ok(Box::new(io::sink()))
}
fn finalize(&mut self) -> io::Result<String> {
Ok(process::id().to_string())
}
fn update(&mut self, _data: &[u8]) {
// No update needed
}
fn meta_name(&mut self) -> String {
self.meta_name.clone()
}
}
#[derive(Debug, Clone, Default)]
pub struct HostnameMetaPlugin {
meta_name: String,
}
impl HostnameMetaPlugin {
pub fn new() -> HostnameMetaPlugin {
HostnameMetaPlugin {
meta_name: "hostname".to_string(),
}
}
}
impl MetaPlugin for HostnameMetaPlugin {
fn create(&self) -> Result<Box<dyn Write>> {
Ok(Box::new(io::sink()))
}
fn finalize(&mut self) -> io::Result<String> {
match gethostname().into_string() {
Ok(hostname) => Ok(hostname),
Err(_) => Ok("unknown".to_string()),
}
}
fn update(&mut self, _data: &[u8]) {
// No update needed for hostname
}
fn meta_name(&mut self) -> String {
self.meta_name.clone()
}
}
#[derive(Debug, Clone, Default)]
pub struct FullHostnameMetaPlugin {
meta_name: String,
}
impl FullHostnameMetaPlugin {
pub fn new() -> FullHostnameMetaPlugin {
FullHostnameMetaPlugin {
meta_name: "full_hostname".to_string(),
}
}
}
impl MetaPlugin for FullHostnameMetaPlugin {
fn create(&self) -> Result<Box<dyn Write>> {
Ok(Box::new(io::sink()))
}
fn finalize(&mut self) -> io::Result<String> {
// Try to get the FQDN through reverse DNS lookup
match local_ip() {
Ok(my_local_ip) => {
match lookup_addr(&my_local_ip) {
Ok(hostname) => Ok(hostname),
Err(_) => {
// Fall back to regular hostname if reverse DNS fails
match gethostname().into_string() {
Ok(hostname) => Ok(hostname),
Err(_) => Ok("unknown".to_string()),
}
}
}
}
Err(_) => {
// Fall back to regular hostname if we can't get local IP
match gethostname().into_string() {
Ok(hostname) => Ok(hostname),
Err(_) => Ok("unknown".to_string()),
}
}
}
}
fn update(&mut self, _data: &[u8]) {
// No update needed for full hostname
}
fn meta_name(&mut self) -> String {
self.meta_name.clone()
}
}