Ugh
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
use crate::common::is_binary::is_binary;
|
||||
use crate::common::PIPESIZE;
|
||||
use crate::common::is_binary::is_binary;
|
||||
use crate::meta_plugin::{MetaPlugin, MetaPluginResponse, MetaPluginType};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -38,15 +38,21 @@ impl TextMetaPlugin {
|
||||
outputs: Option<std::collections::HashMap<String, serde_yaml::Value>>,
|
||||
) -> TextMetaPlugin {
|
||||
let mut base = crate::meta_plugin::BaseMetaPlugin::new();
|
||||
|
||||
|
||||
// Initialize with helper function
|
||||
base.initialize_plugin(
|
||||
&["text", "text_word_count", "text_line_count",
|
||||
"text_line_max_len", "text_line_mean_len", "text_line_median_len"],
|
||||
&[
|
||||
"text",
|
||||
"text_word_count",
|
||||
"text_line_count",
|
||||
"text_line_max_len",
|
||||
"text_line_mean_len",
|
||||
"text_line_median_len",
|
||||
],
|
||||
&options,
|
||||
&outputs,
|
||||
);
|
||||
|
||||
|
||||
// Set disabled outputs to null based on options
|
||||
let outputs_to_disable = vec![
|
||||
("text_word_count", "text_word_count"),
|
||||
@@ -55,7 +61,7 @@ impl TextMetaPlugin {
|
||||
("text_line_mean_len", "text_line_mean_len"),
|
||||
("text_line_median_len", "text_line_median_len"),
|
||||
];
|
||||
|
||||
|
||||
for (option_name, output_name) in outputs_to_disable {
|
||||
if let Some(value) = base.options.get(option_name) {
|
||||
// Handle both boolean false and string "false"
|
||||
@@ -65,53 +71,69 @@ impl TextMetaPlugin {
|
||||
_ => false,
|
||||
};
|
||||
if should_disable {
|
||||
base.outputs.insert(output_name.to_string(), serde_yaml::Value::Null);
|
||||
base.outputs
|
||||
.insert(output_name.to_string(), serde_yaml::Value::Null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Set default options if not provided
|
||||
let default_options = vec![
|
||||
("text_detect_size", serde_yaml::Value::Number(PIPESIZE.into())),
|
||||
(
|
||||
"text_detect_size",
|
||||
serde_yaml::Value::Number(PIPESIZE.into()),
|
||||
),
|
||||
("text_word_count", serde_yaml::Value::Bool(true)),
|
||||
("text_line_count", serde_yaml::Value::Bool(true)),
|
||||
("text_line_max_len", serde_yaml::Value::Bool(true)),
|
||||
("text_line_mean_len", serde_yaml::Value::Bool(true)),
|
||||
("text_line_median_len", serde_yaml::Value::Bool(false)),
|
||||
];
|
||||
|
||||
|
||||
for (key, value) in default_options {
|
||||
if !base.options.contains_key(key) {
|
||||
base.options.insert(key.to_string(), value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Get text_detect_size (previously max_buffer_size)
|
||||
let max_buffer_size = base.options.get("text_detect_size")
|
||||
let max_buffer_size = base
|
||||
.options
|
||||
.get("text_detect_size")
|
||||
.or_else(|| base.options.get("max_buffer_size")) // Handle backward compatibility
|
||||
.and_then(|v| v.as_u64())
|
||||
.unwrap_or(PIPESIZE as u64) as usize;
|
||||
|
||||
|
||||
// Get which statistics to track
|
||||
let track_word_count = base.options.get("text_word_count")
|
||||
let track_word_count = base
|
||||
.options
|
||||
.get("text_word_count")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(true);
|
||||
let track_line_count = base.options.get("text_line_count")
|
||||
let track_line_count = base
|
||||
.options
|
||||
.get("text_line_count")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(true);
|
||||
let track_line_max_len = base.options.get("text_line_max_len")
|
||||
let track_line_max_len = base
|
||||
.options
|
||||
.get("text_line_max_len")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(true);
|
||||
let track_line_mean_len = base.options.get("text_line_mean_len")
|
||||
let track_line_mean_len = base
|
||||
.options
|
||||
.get("text_line_mean_len")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(true);
|
||||
let track_line_median_len = base.options.get("text_line_median_len")
|
||||
let track_line_median_len = base
|
||||
.options
|
||||
.get("text_line_median_len")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(false);
|
||||
|
||||
|
||||
// Track line lengths if any of the line length options are enabled
|
||||
let track_line_lengths = track_line_max_len || track_line_mean_len || track_line_median_len;
|
||||
|
||||
|
||||
TextMetaPlugin {
|
||||
buffer: Some(Vec::new()),
|
||||
max_buffer_size,
|
||||
@@ -130,7 +152,11 @@ impl TextMetaPlugin {
|
||||
output_line_max_len: track_line_max_len,
|
||||
output_line_mean_len: track_line_mean_len,
|
||||
output_line_median_len: track_line_median_len,
|
||||
line_lengths: if track_line_lengths { Some(Vec::new()) } else { None },
|
||||
line_lengths: if track_line_lengths {
|
||||
Some(Vec::new())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
current_line_length: 0,
|
||||
// Initialize incremental tracking for max and mean
|
||||
max_line_length: 0,
|
||||
@@ -138,8 +164,7 @@ impl TextMetaPlugin {
|
||||
line_count_for_stats: 0,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// Count words and lines in a text chunk, handling block boundaries correctly.
|
||||
///
|
||||
/// Processes UTF-8 data, tracks word transitions, and updates line length statistics.
|
||||
@@ -152,7 +177,7 @@ impl TextMetaPlugin {
|
||||
if self.track_line_count {
|
||||
self.line_count += data.iter().filter(|&&b| b == b'\n').count();
|
||||
}
|
||||
|
||||
|
||||
// Handle UTF-8 character boundaries by combining with any buffered bytes
|
||||
let combined_data = if !self.utf8_buffer.is_empty() {
|
||||
let mut combined = self.utf8_buffer.clone();
|
||||
@@ -161,10 +186,10 @@ impl TextMetaPlugin {
|
||||
} else {
|
||||
data.to_vec()
|
||||
};
|
||||
|
||||
|
||||
// Clear the UTF-8 buffer
|
||||
self.utf8_buffer.clear();
|
||||
|
||||
|
||||
// Convert to string, handling potential UTF-8 boundaries
|
||||
let text = match std::str::from_utf8(&combined_data) {
|
||||
Ok(text) => text,
|
||||
@@ -172,7 +197,8 @@ impl TextMetaPlugin {
|
||||
// If we have incomplete UTF-8 at the end, buffer those bytes for next chunk
|
||||
let valid_up_to = e.valid_up_to();
|
||||
if valid_up_to < combined_data.len() {
|
||||
self.utf8_buffer.extend_from_slice(&combined_data[valid_up_to..]);
|
||||
self.utf8_buffer
|
||||
.extend_from_slice(&combined_data[valid_up_to..]);
|
||||
}
|
||||
match std::str::from_utf8(&combined_data[..valid_up_to]) {
|
||||
Ok(text) => text,
|
||||
@@ -180,12 +206,12 @@ impl TextMetaPlugin {
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Count words if needed
|
||||
if self.track_word_count {
|
||||
for ch in text.chars() {
|
||||
let is_whitespace = ch.is_whitespace();
|
||||
|
||||
|
||||
if !self.in_word && !is_whitespace {
|
||||
// Transition from whitespace to word - start of new word
|
||||
self.word_count += 1;
|
||||
@@ -196,7 +222,7 @@ impl TextMetaPlugin {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Track line lengths if needed
|
||||
if self.track_line_lengths {
|
||||
for ch in text.chars() {
|
||||
@@ -205,16 +231,16 @@ impl TextMetaPlugin {
|
||||
if self.current_line_length > self.max_line_length {
|
||||
self.max_line_length = self.current_line_length;
|
||||
}
|
||||
|
||||
|
||||
// Update total for mean calculation
|
||||
self.total_line_length += self.current_line_length;
|
||||
self.line_count_for_stats += 1;
|
||||
|
||||
|
||||
// Only store individual lengths if median is needed
|
||||
if let Some(ref mut lengths) = self.line_lengths {
|
||||
lengths.push(self.current_line_length);
|
||||
}
|
||||
|
||||
|
||||
self.current_line_length = 0;
|
||||
} else {
|
||||
self.current_line_length += 1;
|
||||
@@ -222,7 +248,7 @@ impl TextMetaPlugin {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Helper method to perform binary detection and return appropriate metadata.
|
||||
///
|
||||
/// Uses the is_binary function to check the buffer and sets text-related outputs accordingly.
|
||||
@@ -234,23 +260,30 @@ impl TextMetaPlugin {
|
||||
/// # Returns
|
||||
///
|
||||
/// * `(Vec<MetaData>, bool)` - Metadata updates and whether content is binary.
|
||||
fn perform_binary_detection(&mut self, buffer: &[u8]) -> (Vec<crate::meta_plugin::MetaData>, bool) {
|
||||
fn perform_binary_detection(
|
||||
&mut self,
|
||||
buffer: &[u8],
|
||||
) -> (Vec<crate::meta_plugin::MetaData>, bool) {
|
||||
let mut metadata = Vec::new();
|
||||
let is_binary_result = is_binary(buffer);
|
||||
self.is_binary_content = Some(is_binary_result);
|
||||
|
||||
|
||||
// Output text status
|
||||
let text_value = if is_binary_result { "false".to_string() } else { "true".to_string() };
|
||||
|
||||
let text_value = if is_binary_result {
|
||||
"false".to_string()
|
||||
} else {
|
||||
"true".to_string()
|
||||
};
|
||||
|
||||
// Use process_metadata_outputs to handle output mapping
|
||||
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
|
||||
"text",
|
||||
serde_yaml::Value::String(text_value),
|
||||
self.base.outputs()
|
||||
"text",
|
||||
serde_yaml::Value::String(text_value),
|
||||
self.base.outputs(),
|
||||
) {
|
||||
metadata.push(meta_data);
|
||||
}
|
||||
|
||||
|
||||
// If content is binary, set all text-related outputs to None
|
||||
if is_binary_result {
|
||||
let text_outputs = vec![
|
||||
@@ -262,15 +295,15 @@ impl TextMetaPlugin {
|
||||
];
|
||||
for output_name in text_outputs {
|
||||
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
|
||||
output_name,
|
||||
serde_yaml::Value::Null,
|
||||
self.base.outputs()
|
||||
output_name,
|
||||
serde_yaml::Value::Null,
|
||||
self.base.outputs(),
|
||||
) {
|
||||
metadata.push(meta_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
(metadata, is_binary_result)
|
||||
}
|
||||
|
||||
@@ -292,11 +325,11 @@ impl TextMetaPlugin {
|
||||
if self.current_line_length > self.max_line_length {
|
||||
self.max_line_length = self.current_line_length;
|
||||
}
|
||||
|
||||
|
||||
// Update total for mean calculation for the last line
|
||||
self.total_line_length += self.current_line_length;
|
||||
self.line_count_for_stats += 1;
|
||||
|
||||
|
||||
// Only store individual lengths if median is needed
|
||||
if let Some(ref mut lengths) = self.line_lengths {
|
||||
lengths.push(self.current_line_length);
|
||||
@@ -312,9 +345,9 @@ impl TextMetaPlugin {
|
||||
fn output_word_count_metadata(&self) -> Option<crate::meta_plugin::MetaData> {
|
||||
if self.track_word_count {
|
||||
crate::meta_plugin::process_metadata_outputs(
|
||||
"text_word_count",
|
||||
serde_yaml::Value::String(self.word_count.to_string()),
|
||||
self.base.outputs()
|
||||
"text_word_count",
|
||||
serde_yaml::Value::String(self.word_count.to_string()),
|
||||
self.base.outputs(),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
@@ -329,9 +362,9 @@ impl TextMetaPlugin {
|
||||
fn output_line_count_metadata(&self) -> Option<crate::meta_plugin::MetaData> {
|
||||
if self.track_line_count {
|
||||
crate::meta_plugin::process_metadata_outputs(
|
||||
"text_line_count",
|
||||
serde_yaml::Value::String(self.line_count.to_string()),
|
||||
self.base.outputs()
|
||||
"text_line_count",
|
||||
serde_yaml::Value::String(self.line_count.to_string()),
|
||||
self.base.outputs(),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
@@ -346,9 +379,9 @@ impl TextMetaPlugin {
|
||||
fn output_max_line_length_metadata(&self) -> Option<crate::meta_plugin::MetaData> {
|
||||
if self.output_line_max_len && self.line_count_for_stats > 0 {
|
||||
crate::meta_plugin::process_metadata_outputs(
|
||||
"text_line_max_len",
|
||||
serde_yaml::Value::String(self.max_line_length.to_string()),
|
||||
self.base.outputs()
|
||||
"text_line_max_len",
|
||||
serde_yaml::Value::String(self.max_line_length.to_string()),
|
||||
self.base.outputs(),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
@@ -368,9 +401,9 @@ impl TextMetaPlugin {
|
||||
// Round to nearest integer
|
||||
let mean_len_int = mean_len.round() as usize;
|
||||
crate::meta_plugin::process_metadata_outputs(
|
||||
"text_line_mean_len",
|
||||
serde_yaml::Value::String(mean_len_int.to_string()),
|
||||
self.base.outputs()
|
||||
"text_line_mean_len",
|
||||
serde_yaml::Value::String(mean_len_int.to_string()),
|
||||
self.base.outputs(),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
@@ -386,26 +419,27 @@ impl TextMetaPlugin {
|
||||
/// * `Option<MetaData>` - Metadata entry if enabled and data exists.
|
||||
fn output_median_line_length_metadata(&self) -> Option<crate::meta_plugin::MetaData> {
|
||||
if self.output_line_median_len
|
||||
&& let Some(lengths) = &self.line_lengths {
|
||||
if !lengths.is_empty() {
|
||||
let mut sorted_lengths = lengths.clone();
|
||||
sorted_lengths.sort();
|
||||
let median_len = if lengths.len() % 2 == 0 {
|
||||
(sorted_lengths[lengths.len() / 2 - 1] + sorted_lengths[lengths.len() / 2]) as f64 / 2.0
|
||||
} else {
|
||||
sorted_lengths[lengths.len() / 2] as f64
|
||||
};
|
||||
|
||||
return crate::meta_plugin::process_metadata_outputs(
|
||||
"text_line_median_len",
|
||||
serde_yaml::Value::String(median_len.to_string()),
|
||||
self.base.outputs()
|
||||
);
|
||||
}
|
||||
&& let Some(lengths) = &self.line_lengths
|
||||
&& !lengths.is_empty()
|
||||
{
|
||||
let mut sorted_lengths = lengths.clone();
|
||||
sorted_lengths.sort();
|
||||
let median_len = if lengths.len() % 2 == 0 {
|
||||
(sorted_lengths[lengths.len() / 2 - 1] + sorted_lengths[lengths.len() / 2]) as f64
|
||||
/ 2.0
|
||||
} else {
|
||||
sorted_lengths[lengths.len() / 2] as f64
|
||||
};
|
||||
|
||||
return crate::meta_plugin::process_metadata_outputs(
|
||||
"text_line_median_len",
|
||||
serde_yaml::Value::String(median_len.to_string()),
|
||||
self.base.outputs(),
|
||||
);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
|
||||
/// Helper method to output word and line counts.
|
||||
///
|
||||
/// Finalizes pending data and collects all enabled text statistics metadata.
|
||||
@@ -440,7 +474,10 @@ impl TextMetaPlugin {
|
||||
let line_stats_outputs = vec![
|
||||
(self.output_max_line_length_metadata(), "max line length"),
|
||||
(self.output_mean_line_length_metadata(), "mean line length"),
|
||||
(self.output_median_line_length_metadata(), "median line length"),
|
||||
(
|
||||
self.output_median_line_length_metadata(),
|
||||
"median line length",
|
||||
),
|
||||
];
|
||||
|
||||
for (output, _) in line_stats_outputs {
|
||||
@@ -463,7 +500,7 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
fn is_finalized(&self) -> bool {
|
||||
self.is_finalized
|
||||
}
|
||||
|
||||
|
||||
/// Sets the finalized state of the plugin.
|
||||
///
|
||||
/// # Arguments
|
||||
@@ -473,7 +510,6 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
self.is_finalized = finalized;
|
||||
}
|
||||
|
||||
|
||||
/// Updates the plugin with new data chunk.
|
||||
///
|
||||
/// Accumulates data for binary detection (if pending) or text statistics.
|
||||
@@ -497,7 +533,7 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
|
||||
let mut metadata = Vec::new();
|
||||
let processed_data = data.to_vec();
|
||||
|
||||
|
||||
// If we haven't determined if content is binary yet, build buffer and check
|
||||
if self.is_binary_content.is_none() {
|
||||
let should_finalize = if let Some(ref mut buffer) = self.buffer {
|
||||
@@ -505,7 +541,7 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
let remaining_capacity = self.max_buffer_size.saturating_sub(buffer.len());
|
||||
let bytes_to_take = std::cmp::min(processed_data.len(), remaining_capacity);
|
||||
buffer.extend_from_slice(&processed_data[..bytes_to_take]);
|
||||
|
||||
|
||||
// If we have enough data to make a binary determination, do it now
|
||||
let buffer_len = buffer.len();
|
||||
if buffer_len >= std::cmp::min(1024, self.max_buffer_size) {
|
||||
@@ -514,7 +550,7 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
let (binary_metadata, is_binary) = self.perform_binary_detection(&buffer_clone);
|
||||
metadata.extend(binary_metadata);
|
||||
self.is_binary_content = Some(is_binary);
|
||||
|
||||
|
||||
// If it's binary, we're done with this plugin
|
||||
if is_binary {
|
||||
self.buffer = None; // Drop the buffer
|
||||
@@ -524,16 +560,16 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
is_finalized: true,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
// If it's text, count words and lines for this chunk
|
||||
self.count_text_stats(&processed_data[..bytes_to_take]);
|
||||
|
||||
|
||||
// If we've reached our buffer limit, drop the buffer to save memory
|
||||
// But don't finalize yet - we need to keep counting words and lines
|
||||
if buffer_len >= self.max_buffer_size {
|
||||
self.buffer = None; // Drop the buffer
|
||||
}
|
||||
false // Never finalize here for text content
|
||||
false // Never finalize here for text content
|
||||
} else {
|
||||
// Still building up buffer, count words and lines for this chunk
|
||||
self.count_text_stats(&processed_data[..bytes_to_take]);
|
||||
@@ -542,7 +578,7 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
|
||||
if should_finalize {
|
||||
return MetaPluginResponse {
|
||||
metadata,
|
||||
@@ -584,97 +620,108 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
is_finalized: true,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
let mut metadata = Vec::new();
|
||||
|
||||
|
||||
// Check if we have head/tail options
|
||||
let head_bytes = self.base.options.get("head_bytes")
|
||||
let head_bytes = self
|
||||
.base
|
||||
.options
|
||||
.get("head_bytes")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|v| v as usize);
|
||||
let head_lines = self.base.options.get("head_lines")
|
||||
let head_lines = self
|
||||
.base
|
||||
.options
|
||||
.get("head_lines")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|v| v as usize);
|
||||
let tail_bytes = self.base.options.get("tail_bytes")
|
||||
let tail_bytes = self
|
||||
.base
|
||||
.options
|
||||
.get("tail_bytes")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|v| v as usize);
|
||||
let tail_lines = self.base.options.get("tail_lines")
|
||||
let tail_lines = self
|
||||
.base
|
||||
.options
|
||||
.get("tail_lines")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|v| v as usize);
|
||||
|
||||
|
||||
// If we haven't determined binary status yet, do it now with whatever we have
|
||||
if self.is_binary_content.is_none() {
|
||||
if let Some(buffer) = &self.buffer {
|
||||
if !buffer.is_empty() {
|
||||
// Build filter string from individual parameters
|
||||
let mut filter_parts = Vec::new();
|
||||
if let Some(bytes) = head_bytes {
|
||||
filter_parts.push(format!("head_bytes({})", bytes));
|
||||
}
|
||||
if let Some(lines) = head_lines {
|
||||
filter_parts.push(format!("head_lines({})", lines));
|
||||
}
|
||||
if let Some(bytes) = tail_bytes {
|
||||
filter_parts.push(format!("tail_bytes({})", bytes));
|
||||
}
|
||||
if let Some(lines) = tail_lines {
|
||||
filter_parts.push(format!("tail_lines({})", lines));
|
||||
}
|
||||
|
||||
// For now, just use the buffer as-is since filtering isn't implemented
|
||||
let processed_buffer = buffer.clone();
|
||||
|
||||
// Clone the processed buffer data for binary detection
|
||||
let (binary_metadata, is_binary) = self.perform_binary_detection(&processed_buffer);
|
||||
metadata.extend(binary_metadata);
|
||||
self.is_binary_content = Some(is_binary);
|
||||
|
||||
// If it's binary, we're done
|
||||
if is_binary {
|
||||
self.buffer = None; // Drop the buffer
|
||||
self.is_finalized = true;
|
||||
// Set all text-related outputs to None since content is binary
|
||||
// Only include outputs that are enabled in the configuration
|
||||
let text_outputs = vec![
|
||||
("text_word_count", self.track_word_count),
|
||||
("text_line_count", self.track_line_count),
|
||||
("text_line_max_len", self.output_line_max_len),
|
||||
("text_line_mean_len", self.output_line_mean_len),
|
||||
("text_line_median_len", self.output_line_median_len),
|
||||
];
|
||||
|
||||
for (output_name, is_enabled) in text_outputs {
|
||||
if is_enabled {
|
||||
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
|
||||
output_name,
|
||||
serde_yaml::Value::Null,
|
||||
self.base.outputs()
|
||||
) {
|
||||
metadata.push(meta_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
return MetaPluginResponse {
|
||||
metadata,
|
||||
is_finalized: true,
|
||||
};
|
||||
if self.is_binary_content.is_none()
|
||||
&& let Some(buffer) = &self.buffer
|
||||
&& !buffer.is_empty()
|
||||
{
|
||||
// Build filter string from individual parameters
|
||||
let mut filter_parts = Vec::new();
|
||||
if let Some(bytes) = head_bytes {
|
||||
filter_parts.push(format!("head_bytes({})", bytes));
|
||||
}
|
||||
if let Some(lines) = head_lines {
|
||||
filter_parts.push(format!("head_lines({})", lines));
|
||||
}
|
||||
if let Some(bytes) = tail_bytes {
|
||||
filter_parts.push(format!("tail_bytes({})", bytes));
|
||||
}
|
||||
if let Some(lines) = tail_lines {
|
||||
filter_parts.push(format!("tail_lines({})", lines));
|
||||
}
|
||||
|
||||
// For now, just use the buffer as-is since filtering isn't implemented
|
||||
let processed_buffer = buffer.clone();
|
||||
|
||||
// Clone the processed buffer data for binary detection
|
||||
let (binary_metadata, is_binary) = self.perform_binary_detection(&processed_buffer);
|
||||
metadata.extend(binary_metadata);
|
||||
self.is_binary_content = Some(is_binary);
|
||||
|
||||
// If it's binary, we're done
|
||||
if is_binary {
|
||||
self.buffer = None; // Drop the buffer
|
||||
self.is_finalized = true;
|
||||
// Set all text-related outputs to None since content is binary
|
||||
// Only include outputs that are enabled in the configuration
|
||||
let text_outputs = vec![
|
||||
("text_word_count", self.track_word_count),
|
||||
("text_line_count", self.track_line_count),
|
||||
("text_line_max_len", self.output_line_max_len),
|
||||
("text_line_mean_len", self.output_line_mean_len),
|
||||
("text_line_median_len", self.output_line_median_len),
|
||||
];
|
||||
|
||||
for (output_name, is_enabled) in text_outputs {
|
||||
if is_enabled
|
||||
&& let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
|
||||
output_name,
|
||||
serde_yaml::Value::Null,
|
||||
self.base.outputs(),
|
||||
)
|
||||
{
|
||||
metadata.push(meta_data);
|
||||
}
|
||||
}
|
||||
return MetaPluginResponse {
|
||||
metadata,
|
||||
is_finalized: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// If content is text, output word and line counts
|
||||
if self.is_binary_content == Some(false) {
|
||||
let word_line_metadata = self.output_word_line_counts();
|
||||
metadata.extend(word_line_metadata);
|
||||
}
|
||||
|
||||
|
||||
// Only include outputs that are enabled in the configuration
|
||||
// Disabled outputs should not be emitted at all (not even as null)
|
||||
// So we don't need to add anything for disabled outputs
|
||||
|
||||
|
||||
// Drop the buffer since we're done with it
|
||||
self.buffer = None;
|
||||
|
||||
|
||||
// Mark as finalized
|
||||
self.is_finalized = true;
|
||||
MetaPluginResponse {
|
||||
@@ -691,7 +738,7 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
fn meta_type(&self) -> MetaPluginType {
|
||||
MetaPluginType::Text
|
||||
}
|
||||
|
||||
|
||||
/// Returns a reference to the outputs mapping.
|
||||
///
|
||||
/// # Returns
|
||||
@@ -700,7 +747,7 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
fn outputs(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
||||
self.base.outputs()
|
||||
}
|
||||
|
||||
|
||||
/// Returns a mutable reference to the outputs mapping.
|
||||
///
|
||||
/// # Returns
|
||||
@@ -709,7 +756,7 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
fn outputs_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
|
||||
self.base.outputs_mut()
|
||||
}
|
||||
|
||||
|
||||
/// Returns the default output names for this plugin.
|
||||
///
|
||||
/// # Returns
|
||||
@@ -717,15 +764,15 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
/// Vector of default output field names.
|
||||
fn default_outputs(&self) -> Vec<String> {
|
||||
vec![
|
||||
"text".to_string(),
|
||||
"text_word_count".to_string(),
|
||||
"text".to_string(),
|
||||
"text_word_count".to_string(),
|
||||
"text_line_count".to_string(),
|
||||
"text_line_max_len".to_string(),
|
||||
"text_line_mean_len".to_string(),
|
||||
"text_line_median_len".to_string()
|
||||
"text_line_median_len".to_string(),
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
/// Returns a reference to the options mapping.
|
||||
///
|
||||
/// # Returns
|
||||
@@ -734,7 +781,7 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
fn options(&self) -> &std::collections::HashMap<String, serde_yaml::Value> {
|
||||
self.base.options()
|
||||
}
|
||||
|
||||
|
||||
/// Returns a mutable reference to the options mapping.
|
||||
///
|
||||
/// # Returns
|
||||
@@ -743,7 +790,6 @@ impl MetaPlugin for TextMetaPlugin {
|
||||
fn options_mut(&mut self) -> &mut std::collections::HashMap<String, serde_yaml::Value> {
|
||||
self.base.options_mut()
|
||||
}
|
||||
|
||||
}
|
||||
use crate::meta_plugin::register_meta_plugin;
|
||||
|
||||
@@ -753,4 +799,4 @@ fn register_text_plugin() {
|
||||
register_meta_plugin(MetaPluginType::Text, |options, outputs| {
|
||||
Box::new(TextMetaPlugin::new(options, outputs))
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user