keep/src/common/mod.rs

pub mod is_binary;

/// Detects if data is binary or text based on signatures and printable ratios.
pub mod status;

/// Plugin schema types and discovery functions.
pub mod schema;

/// Standard buffer size for I/O operations (8KB)
pub const PIPESIZE: usize = 8192;

/// Reads chunks from `reader` until EOF, passing each chunk to `f`.
///
/// Uses a fixed PIPESIZE buffer to ensure bounded memory usage.
pub fn stream_copy<R: std::io::Read + ?Sized>(
    reader: &mut R,
    mut f: impl FnMut(&[u8]) -> std::io::Result<()>,
) -> std::io::Result<()> {
    let mut buffer = [0u8; PIPESIZE];
    loop {
        let n = reader.read(&mut buffer)?;
        if n == 0 {
            break;
        }
        f(&buffer[..n])?;
    }
    Ok(())
}

/// Reads content from a reader with offset and length bounds.
///
/// Skips `offset` bytes from the reader, then reads up to `length` bytes
/// (or all remaining if `length` is 0). Uses PIPESIZE buffers throughout.
///
/// # Arguments
///
/// * `reader` - The source reader positioned at the start.
/// * `offset` - Number of bytes to skip before reading.
/// * `length` - Maximum bytes to read (0 = read all remaining).
/// * `content_len` - Total content size (used to cap skip/read amounts).
///
/// # Returns
///
/// A `Vec<u8>` containing the requested byte range.
pub fn read_with_bounds<R: std::io::Read>(
    reader: &mut R,
    offset: u64,
    length: u64,
    content_len: u64,
) -> std::io::Result<Vec<u8>> {
    // Skip offset bytes
    let skip = std::cmp::min(offset, content_len);
    let mut remaining = skip;
    let mut buf = [0u8; PIPESIZE];
    while remaining > 0 {
        let to_read = std::cmp::min(remaining, buf.len() as u64) as usize;
        match reader.read(&mut buf[..to_read]) {
            Ok(0) => break,
            Ok(n) => remaining -= n as u64,
            Err(e) => return Err(e),
        }
    }

    // Read bounded content
    let max_bytes = if length > 0 {
        std::cmp::min(length, content_len.saturating_sub(offset))
    } else {
        content_len.saturating_sub(offset)
    };
    let mut result = Vec::with_capacity(std::cmp::min(max_bytes, 64 * 1024) as usize);
    let mut bytes_read = 0u64;
    while bytes_read < max_bytes {
        let to_read = std::cmp::min(max_bytes - bytes_read, buf.len() as u64) as usize;
        match reader.read(&mut buf[..to_read]) {
            Ok(0) => break,
            Ok(n) => {
                result.extend_from_slice(&buf[..n]);
                bytes_read += n as u64;
            }
            Err(e) => return Err(e),
        }
    }
    Ok(result)
}

/// Sanitize a timestamp string for use in filenames.
///
/// Replaces colons with hyphens (e.g., `2026-03-17T12:00:00Z` → `2026-03-17T12-00-00Z`).
pub fn sanitize_ts_string(ts: &str) -> String {
    ts.replace(':', "-")
}