fix: correct critical bugs and improve pipe streaming performance
Critical bug fixes:
- save_item now returns real Item from database, not a hardcoded fake
- AsyncDataService::save() reuses self.sync_service instead of creating redundant instance
- GenerateStatus trait signature mismatch fixed (CLI/API decoupling)
Performance improvements (pipe path untouched):
- CompressionEngine::open() returns Box<dyn Read + Send> enabling true streaming
- mode_get eliminates triple full-file read (was sampling then re-reading entire file)
- FilteringReader adds fast-path bypass when no filters, pre-allocates temp buffer
- text.rs meta plugin processes &[u8] slice directly, eliminates data.to_vec() clone
API correctness:
- Tag parse errors now return 400 instead of being silently discarded
- compute_diff uses similar crate (LCS-based) instead of naive positional comparison
Cleanup:
- Modernize string formatting (format!({x})) across codebase
- Remove redundant DB query in get mode
- Derive Debug/ToSchema on public types
- Delete placeholder test files with no real assertions
- Extract parse_comma_tags utility function
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
use crate::common::PIPESIZE;
|
||||
use crate::common::is_binary::is_binary;
|
||||
use crate::common::PIPESIZE;
|
||||
use crate::meta_plugin::{MetaPlugin, MetaPluginResponse, MetaPluginType};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -532,15 +532,14 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
}
|
||||
|
||||
let mut metadata = Vec::new();
|
||||
let processed_data = data.to_vec();
|
||||
|
||||
// If we haven't determined if content is binary yet, build buffer and check
|
||||
if self.is_binary_content.is_none() {
|
||||
let should_finalize = if let Some(ref mut buffer) = self.buffer {
|
||||
// Add processed data to our buffer up to max_buffer_size
|
||||
// Add data to our buffer up to max_buffer_size
|
||||
let remaining_capacity = self.max_buffer_size.saturating_sub(buffer.len());
|
||||
let bytes_to_take = std::cmp::min(processed_data.len(), remaining_capacity);
|
||||
buffer.extend_from_slice(&processed_data[..bytes_to_take]);
|
||||
let bytes_to_take = std::cmp::min(data.len(), remaining_capacity);
|
||||
buffer.extend_from_slice(&data[..bytes_to_take]);
|
||||
|
||||
// If we have enough data to make a binary determination, do it now
|
||||
let buffer_len = buffer.len();
|
||||
@@ -562,7 +561,7 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
}
|
||||
|
||||
// If it's text, count words and lines for this chunk
|
||||
self.count_text_stats(&processed_data[..bytes_to_take]);
|
||||
self.count_text_stats(&data[..bytes_to_take]);
|
||||
|
||||
// If we've reached our buffer limit, drop the buffer to save memory
|
||||
// But don't finalize yet - we need to keep counting words and lines
|
||||
@@ -572,7 +571,7 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
false // Never finalize here for text content
|
||||
} else {
|
||||
// Still building up buffer, count words and lines for this chunk
|
||||
self.count_text_stats(&processed_data[..bytes_to_take]);
|
||||
self.count_text_stats(&data[..bytes_to_take]);
|
||||
false
|
||||
}
|
||||
} else {
|
||||
@@ -587,7 +586,7 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
}
|
||||
} else if self.is_binary_content == Some(false) {
|
||||
// We've already determined it's text, just count words and lines
|
||||
self.count_text_stats(&processed_data);
|
||||
self.count_text_stats(data);
|
||||
}
|
||||
// If is_binary_content == Some(true), we should have already finalized, but just in case:
|
||||
else if self.is_binary_content == Some(true) {
|
||||
@@ -654,26 +653,43 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
&& let Some(buffer) = &self.buffer
|
||||
&& !buffer.is_empty()
|
||||
{
|
||||
// Build filter string from individual parameters
|
||||
let mut filter_parts = Vec::new();
|
||||
if let Some(bytes) = head_bytes {
|
||||
filter_parts.push(format!("head_bytes({})", bytes));
|
||||
}
|
||||
if let Some(lines) = head_lines {
|
||||
filter_parts.push(format!("head_lines({})", lines));
|
||||
}
|
||||
if let Some(bytes) = tail_bytes {
|
||||
filter_parts.push(format!("tail_bytes({})", bytes));
|
||||
}
|
||||
if let Some(lines) = tail_lines {
|
||||
filter_parts.push(format!("tail_lines({})", lines));
|
||||
}
|
||||
let buffer = if head_bytes.is_some()
|
||||
|| head_lines.is_some()
|
||||
|| tail_bytes.is_some()
|
||||
|| tail_lines.is_some()
|
||||
{
|
||||
// Build filter string from individual parameters
|
||||
let mut filter_parts = Vec::new();
|
||||
if let Some(bytes) = head_bytes {
|
||||
filter_parts.push(format!("head_bytes({bytes})"));
|
||||
}
|
||||
if let Some(lines) = head_lines {
|
||||
filter_parts.push(format!("head_lines({lines})"));
|
||||
}
|
||||
if let Some(bytes) = tail_bytes {
|
||||
filter_parts.push(format!("tail_bytes({bytes})"));
|
||||
}
|
||||
if let Some(lines) = tail_lines {
|
||||
filter_parts.push(format!("tail_lines({lines})"));
|
||||
}
|
||||
|
||||
// For now, just use the buffer as-is since filtering isn't implemented
|
||||
let processed_buffer = buffer.clone();
|
||||
// Apply filters if any are specified
|
||||
let filter_string = filter_parts.join(",");
|
||||
match crate::services::FilterService::new()
|
||||
.process_with_filter(buffer, Some(&filter_string))
|
||||
{
|
||||
Ok(filtered) => filtered,
|
||||
Err(e) => {
|
||||
log::warn!("Failed to apply filters: {e}");
|
||||
buffer.clone()
|
||||
}
|
||||
}
|
||||
} else {
|
||||
buffer.clone()
|
||||
};
|
||||
|
||||
// Clone the processed buffer data for binary detection
|
||||
let (binary_metadata, is_binary) = self.perform_binary_detection(&processed_buffer);
|
||||
let (binary_metadata, is_binary) = self.perform_binary_detection(&buffer);
|
||||
metadata.extend(binary_metadata);
|
||||
self.is_binary_content = Some(is_binary);
|
||||
|
||||
@@ -777,7 +793,7 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A reference to the `HashMap` of options.
|
||||
/// A reference to the `HashMap` of outputs.
|
||||
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
||||
self.base.options()
|
||||
}
|
||||
@@ -786,7 +802,7 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A mutable reference to the `HashMap` of options.
|
||||
/// A mutable reference to the `HashMap` of outputs.
|
||||
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
|
||||
self.base.options_mut()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user