diff --git a/src/meta_plugin/text.rs b/src/meta_plugin/text.rs index e3ec8d3..3b91dc0 100644 --- a/src/meta_plugin/text.rs +++ b/src/meta_plugin/text.rs @@ -19,9 +19,17 @@ pub struct TextMetaPlugin { track_word_count: bool, track_line_count: bool, track_line_lengths: bool, + // Flags for which line length statistics to output + output_line_max_len: bool, + output_line_mean_len: bool, + output_line_median_len: bool, // For tracking line lengths line_lengths: Option>, current_line_length: usize, + // For incremental calculation of max and mean + max_line_length: usize, + total_line_length: usize, + line_count_for_stats: usize, } impl TextMetaPlugin { @@ -82,8 +90,16 @@ impl TextMetaPlugin { track_word_count, track_line_count, track_line_lengths, + // Set output flags + output_line_max_len: track_line_max_len, + output_line_mean_len: track_line_mean_len, + output_line_median_len: track_line_median_len, line_lengths: if track_line_lengths { Some(Vec::new()) } else { None }, current_line_length: 0, + // Initialize incremental tracking for max and mean + max_line_length: 0, + total_line_length: 0, + line_count_for_stats: 0, } } @@ -146,9 +162,20 @@ impl TextMetaPlugin { if self.track_line_lengths { for ch in text.chars() { if ch == '\n' { + // Update max line length + if self.current_line_length > self.max_line_length { + self.max_line_length = self.current_line_length; + } + + // Update total for mean calculation + self.total_line_length += self.current_line_length; + self.line_count_for_stats += 1; + + // Only store individual lengths if median is needed if let Some(ref mut lengths) = self.line_lengths { lengths.push(self.current_line_length); } + self.current_line_length = 0; } else { self.current_line_length += 1; @@ -199,6 +226,16 @@ impl TextMetaPlugin { // Handle the last line if tracking line lengths if self.track_line_lengths && self.current_line_length > 0 { + // Update max line length for the last line + if self.current_line_length > self.max_line_length { + self.max_line_length = self.current_line_length; + } + + // Update total for mean calculation for the last line + self.total_line_length += self.current_line_length; + self.line_count_for_stats += 1; + + // Only store individual lengths if median is needed if let Some(ref mut lengths) = self.line_lengths { lengths.push(self.current_line_length); } @@ -237,65 +274,49 @@ impl TextMetaPlugin { } // Output line length statistics if tracked - if self.track_line_lengths { - if let Some(lengths) = &self.line_lengths { - log::debug!("TEXT: Line lengths: {:?} (count: {})", lengths, lengths.len()); - if !lengths.is_empty() { - // Calculate max, mean, median - let max_len = lengths.iter().max().unwrap(); - let sum: usize = lengths.iter().sum(); - let mean_len = sum as f64 / lengths.len() as f64; - - let mut sorted_lengths = lengths.clone(); - sorted_lengths.sort(); - let median_len = if lengths.len() % 2 == 0 { - (sorted_lengths[lengths.len() / 2 - 1] + sorted_lengths[lengths.len() / 2]) as f64 / 2.0 - } else { - sorted_lengths[lengths.len() / 2] as f64 - }; - - log::debug!("TEXT: Line stats - max: {}, mean: {}, median: {}", max_len, mean_len, median_len); - - // Check if each statistic should be output based on options - // Default to true if option is not present - let output_max = self.base.options.get("text_line_max_len") - .and_then(|v| v.as_bool()) - .unwrap_or(true); - let output_mean = self.base.options.get("text_line_mean_len") - .and_then(|v| v.as_bool()) - .unwrap_or(true); - let output_median = self.base.options.get("text_line_median_len") - .and_then(|v| v.as_bool()) - .unwrap_or(true); - - log::debug!("TEXT: Output flags - max: {}, mean: {}, median: {}", output_max, output_mean, output_median); - - // Add each statistic if enabled - if output_max { - if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( - "text_line_max_len", - max_len.to_string(), - self.base.outputs() - ) { - log::debug!("TEXT: Adding max line length metadata: {:?}", meta_data); - metadata.push(meta_data); - } - } - - if output_mean { - // Round to nearest integer - let mean_len_int = mean_len.round() as usize; - if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( - "text_line_mean_len", - mean_len_int.to_string(), - self.base.outputs() - ) { - log::debug!("TEXT: Adding mean line length metadata: {:?}", meta_data); - metadata.push(meta_data); - } - } - - if output_median { + if self.track_line_lengths && self.line_count_for_stats > 0 { + log::debug!("TEXT: Output flags - max: {}, mean: {}, median: {}", + self.output_line_max_len, self.output_line_mean_len, self.output_line_median_len); + + // Calculate and output max line length if enabled + if self.output_line_max_len { + if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( + "text_line_max_len", + self.max_line_length.to_string(), + self.base.outputs() + ) { + log::debug!("TEXT: Adding max line length metadata: {:?}", meta_data); + metadata.push(meta_data); + } + } + + // Calculate and output mean line length if enabled + if self.output_line_mean_len { + let mean_len = self.total_line_length as f64 / self.line_count_for_stats as f64; + // Round to nearest integer + let mean_len_int = mean_len.round() as usize; + if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( + "text_line_mean_len", + mean_len_int.to_string(), + self.base.outputs() + ) { + log::debug!("TEXT: Adding mean line length metadata: {:?}", meta_data); + metadata.push(meta_data); + } + } + + // Calculate and output median line length if enabled + if self.output_line_median_len { + if let Some(lengths) = &self.line_lengths { + if !lengths.is_empty() { + let mut sorted_lengths = lengths.clone(); + sorted_lengths.sort(); + let median_len = if lengths.len() % 2 == 0 { + (sorted_lengths[lengths.len() / 2 - 1] + sorted_lengths[lengths.len() / 2]) as f64 / 2.0 + } else { + sorted_lengths[lengths.len() / 2] as f64 + }; + if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( "text_line_median_len", median_len.to_string(), @@ -304,15 +325,15 @@ impl TextMetaPlugin { log::debug!("TEXT: Adding median line length metadata: {:?}", meta_data); metadata.push(meta_data); } + } else { + log::debug!("TEXT: No line lengths recorded for median calculation"); } } else { - log::debug!("TEXT: No line lengths recorded"); + log::debug!("TEXT: Line lengths tracking is None for median calculation"); } - } else { - log::debug!("TEXT: Line lengths tracking is None"); } } else { - log::debug!("TEXT: Line lengths tracking is disabled"); + log::debug!("TEXT: Line lengths tracking is disabled or no lines processed"); } metadata