diff --git a/src/meta_plugin/text.rs b/src/meta_plugin/text.rs index 4144e05..c1a9575 100644 --- a/src/meta_plugin/text.rs +++ b/src/meta_plugin/text.rs @@ -214,17 +214,16 @@ impl TextMetaPlugin { (metadata, is_binary_result) } - - /// Helper method to output word and line counts - fn output_word_line_counts(&mut self) -> Vec { - let mut metadata = Vec::new(); - - // Process any remaining data in utf8_buffer + + /// Helper method to process the remaining UTF-8 buffer and finalize text statistics + fn process_remaining_utf8_buffer(&mut self) { if !self.utf8_buffer.is_empty() { self.count_text_stats(&[]); } - - // Handle the last line if tracking line lengths + } + + /// Helper method to handle the last line when tracking line lengths + fn handle_last_line_for_length_tracking(&mut self) { if self.track_line_lengths && self.current_line_length > 0 { // Update max line length for the last line if self.current_line_length > self.max_line_length { @@ -240,37 +239,115 @@ impl TextMetaPlugin { lengths.push(self.current_line_length); } } + } + + /// Helper method to output word count metadata + fn output_word_count_metadata(&self) -> Option { + if self.track_word_count { + crate::meta_plugin::process_metadata_outputs( + "text_word_count", + self.word_count.to_string(), + self.base.outputs() + ) + } else { + None + } + } + + /// Helper method to output line count metadata + fn output_line_count_metadata(&self) -> Option { + if self.track_line_count { + crate::meta_plugin::process_metadata_outputs( + "text_line_count", + self.line_count.to_string(), + self.base.outputs() + ) + } else { + None + } + } + + /// Helper method to output max line length metadata + fn output_max_line_length_metadata(&self) -> Option { + if self.output_line_max_len && self.line_count_for_stats > 0 { + crate::meta_plugin::process_metadata_outputs( + "text_line_max_len", + self.max_line_length.to_string(), + self.base.outputs() + ) + } else { + None + } + } + + /// Helper method to output mean line length metadata + fn output_mean_line_length_metadata(&self) -> Option { + if self.output_line_mean_len && self.line_count_for_stats > 0 { + let mean_len = self.total_line_length as f64 / self.line_count_for_stats as f64; + // Round to nearest integer + let mean_len_int = mean_len.round() as usize; + crate::meta_plugin::process_metadata_outputs( + "text_line_mean_len", + mean_len_int.to_string(), + self.base.outputs() + ) + } else { + None + } + } + + /// Helper method to output median line length metadata + fn output_median_line_length_metadata(&self) -> Option { + if self.output_line_median_len { + if let Some(lengths) = &self.line_lengths { + if !lengths.is_empty() { + let mut sorted_lengths = lengths.clone(); + sorted_lengths.sort(); + let median_len = if lengths.len() % 2 == 0 { + (sorted_lengths[lengths.len() / 2 - 1] + sorted_lengths[lengths.len() / 2]) as f64 / 2.0 + } else { + sorted_lengths[lengths.len() / 2] as f64 + }; + + return crate::meta_plugin::process_metadata_outputs( + "text_line_median_len", + median_len.to_string(), + self.base.outputs() + ); + } + } + } + None + } + + /// Helper method to output word and line counts + fn output_word_line_counts(&mut self) -> Vec { + let mut metadata = Vec::new(); + + // Process any remaining data in utf8_buffer + self.process_remaining_utf8_buffer(); + + // Handle the last line if tracking line lengths + self.handle_last_line_for_length_tracking(); // Debug: check if outputs are configured log::debug!("TEXT: Outputs: {:?}", self.base.outputs()); log::debug!("TEXT: Word count: {}, Line count: {}", self.word_count, self.line_count); // Output word count if tracked - if self.track_word_count { - if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( - "text_word_count", - self.word_count.to_string(), - self.base.outputs() - ) { - log::debug!("TEXT: Adding word count metadata: {:?}", meta_data); - metadata.push(meta_data); - } else { - log::debug!("TEXT: Word count output is disabled or not mapped"); - } + if let Some(meta_data) = self.output_word_count_metadata() { + log::debug!("TEXT: Adding word count metadata: {:?}", meta_data); + metadata.push(meta_data); + } else { + log::debug!("TEXT: Word count output is disabled or not mapped"); } // Output line count if tracked - if self.track_line_count { - if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( - "text_line_count", - self.line_count.to_string(), - self.base.outputs() - ) { - log::debug!("TEXT: Adding line count metadata: {:?}", meta_data); - metadata.push(meta_data); - } else { - log::debug!("TEXT: Line count output is disabled or not mapped"); - } + if let Some(meta_data) = self.output_line_count_metadata() { + log::debug!("TEXT: Adding line count metadata: {:?}", meta_data); + metadata.push(meta_data); + } else { + log::debug!("TEXT: Line count output is disabled or not mapped"); } // Output line length statistics if tracked @@ -279,58 +356,21 @@ impl TextMetaPlugin { self.output_line_max_len, self.output_line_mean_len, self.output_line_median_len); // Calculate and output max line length if enabled - if self.output_line_max_len { - if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( - "text_line_max_len", - self.max_line_length.to_string(), - self.base.outputs() - ) { - log::debug!("TEXT: Adding max line length metadata: {:?}", meta_data); - metadata.push(meta_data); - } + if let Some(meta_data) = self.output_max_line_length_metadata() { + log::debug!("TEXT: Adding max line length metadata: {:?}", meta_data); + metadata.push(meta_data); } // Calculate and output mean line length if enabled - if self.output_line_mean_len { - let mean_len = self.total_line_length as f64 / self.line_count_for_stats as f64; - // Round to nearest integer - let mean_len_int = mean_len.round() as usize; - if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( - "text_line_mean_len", - mean_len_int.to_string(), - self.base.outputs() - ) { - log::debug!("TEXT: Adding mean line length metadata: {:?}", meta_data); - metadata.push(meta_data); - } + if let Some(meta_data) = self.output_mean_line_length_metadata() { + log::debug!("TEXT: Adding mean line length metadata: {:?}", meta_data); + metadata.push(meta_data); } // Calculate and output median line length if enabled - if self.output_line_median_len { - if let Some(lengths) = &self.line_lengths { - if !lengths.is_empty() { - let mut sorted_lengths = lengths.clone(); - sorted_lengths.sort(); - let median_len = if lengths.len() % 2 == 0 { - (sorted_lengths[lengths.len() / 2 - 1] + sorted_lengths[lengths.len() / 2]) as f64 / 2.0 - } else { - sorted_lengths[lengths.len() / 2] as f64 - }; - - if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( - "text_line_median_len", - median_len.to_string(), - self.base.outputs() - ) { - log::debug!("TEXT: Adding median line length metadata: {:?}", meta_data); - metadata.push(meta_data); - } - } else { - log::debug!("TEXT: No line lengths recorded for median calculation"); - } - } else { - log::debug!("TEXT: Line lengths tracking is None for median calculation"); - } + if let Some(meta_data) = self.output_median_line_length_metadata() { + log::debug!("TEXT: Adding median line length metadata: {:?}", meta_data); + metadata.push(meta_data); } } else { log::debug!("TEXT: Line lengths tracking is disabled or no lines processed");