feat: add line length statistics tracking flags

Co-authored-by: aider (openai/andrew/openrouter/deepseek/deepseek-chat-v3.1) <aider@aider.chat>
This commit is contained in:
Andrew Phillips
2025-08-27 10:35:14 -03:00
parent 1ea4fc2180
commit e8f2c00416

View File

@@ -19,9 +19,17 @@ pub struct TextMetaPlugin {
track_word_count: bool, track_word_count: bool,
track_line_count: bool, track_line_count: bool,
track_line_lengths: bool, track_line_lengths: bool,
// Flags for which line length statistics to output
output_line_max_len: bool,
output_line_mean_len: bool,
output_line_median_len: bool,
// For tracking line lengths // For tracking line lengths
line_lengths: Option<Vec<usize>>, line_lengths: Option<Vec<usize>>,
current_line_length: usize, current_line_length: usize,
// For incremental calculation of max and mean
max_line_length: usize,
total_line_length: usize,
line_count_for_stats: usize,
} }
impl TextMetaPlugin { impl TextMetaPlugin {
@@ -82,8 +90,16 @@ impl TextMetaPlugin {
track_word_count, track_word_count,
track_line_count, track_line_count,
track_line_lengths, track_line_lengths,
// Set output flags
output_line_max_len: track_line_max_len,
output_line_mean_len: track_line_mean_len,
output_line_median_len: track_line_median_len,
line_lengths: if track_line_lengths { Some(Vec::new()) } else { None }, line_lengths: if track_line_lengths { Some(Vec::new()) } else { None },
current_line_length: 0, current_line_length: 0,
// Initialize incremental tracking for max and mean
max_line_length: 0,
total_line_length: 0,
line_count_for_stats: 0,
} }
} }
@@ -146,9 +162,20 @@ impl TextMetaPlugin {
if self.track_line_lengths { if self.track_line_lengths {
for ch in text.chars() { for ch in text.chars() {
if ch == '\n' { if ch == '\n' {
// Update max line length
if self.current_line_length > self.max_line_length {
self.max_line_length = self.current_line_length;
}
// Update total for mean calculation
self.total_line_length += self.current_line_length;
self.line_count_for_stats += 1;
// Only store individual lengths if median is needed
if let Some(ref mut lengths) = self.line_lengths { if let Some(ref mut lengths) = self.line_lengths {
lengths.push(self.current_line_length); lengths.push(self.current_line_length);
} }
self.current_line_length = 0; self.current_line_length = 0;
} else { } else {
self.current_line_length += 1; self.current_line_length += 1;
@@ -199,6 +226,16 @@ impl TextMetaPlugin {
// Handle the last line if tracking line lengths // Handle the last line if tracking line lengths
if self.track_line_lengths && self.current_line_length > 0 { if self.track_line_lengths && self.current_line_length > 0 {
// Update max line length for the last line
if self.current_line_length > self.max_line_length {
self.max_line_length = self.current_line_length;
}
// Update total for mean calculation for the last line
self.total_line_length += self.current_line_length;
self.line_count_for_stats += 1;
// Only store individual lengths if median is needed
if let Some(ref mut lengths) = self.line_lengths { if let Some(ref mut lengths) = self.line_lengths {
lengths.push(self.current_line_length); lengths.push(self.current_line_length);
} }
@@ -237,44 +274,15 @@ impl TextMetaPlugin {
} }
// Output line length statistics if tracked // Output line length statistics if tracked
if self.track_line_lengths { if self.track_line_lengths && self.line_count_for_stats > 0 {
if let Some(lengths) = &self.line_lengths { log::debug!("TEXT: Output flags - max: {}, mean: {}, median: {}",
log::debug!("TEXT: Line lengths: {:?} (count: {})", lengths, lengths.len()); self.output_line_max_len, self.output_line_mean_len, self.output_line_median_len);
if !lengths.is_empty() {
// Calculate max, mean, median
let max_len = lengths.iter().max().unwrap();
let sum: usize = lengths.iter().sum();
let mean_len = sum as f64 / lengths.len() as f64;
let mut sorted_lengths = lengths.clone(); // Calculate and output max line length if enabled
sorted_lengths.sort(); if self.output_line_max_len {
let median_len = if lengths.len() % 2 == 0 {
(sorted_lengths[lengths.len() / 2 - 1] + sorted_lengths[lengths.len() / 2]) as f64 / 2.0
} else {
sorted_lengths[lengths.len() / 2] as f64
};
log::debug!("TEXT: Line stats - max: {}, mean: {}, median: {}", max_len, mean_len, median_len);
// Check if each statistic should be output based on options
// Default to true if option is not present
let output_max = self.base.options.get("text_line_max_len")
.and_then(|v| v.as_bool())
.unwrap_or(true);
let output_mean = self.base.options.get("text_line_mean_len")
.and_then(|v| v.as_bool())
.unwrap_or(true);
let output_median = self.base.options.get("text_line_median_len")
.and_then(|v| v.as_bool())
.unwrap_or(true);
log::debug!("TEXT: Output flags - max: {}, mean: {}, median: {}", output_max, output_mean, output_median);
// Add each statistic if enabled
if output_max {
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
"text_line_max_len", "text_line_max_len",
max_len.to_string(), self.max_line_length.to_string(),
self.base.outputs() self.base.outputs()
) { ) {
log::debug!("TEXT: Adding max line length metadata: {:?}", meta_data); log::debug!("TEXT: Adding max line length metadata: {:?}", meta_data);
@@ -282,7 +290,9 @@ impl TextMetaPlugin {
} }
} }
if output_mean { // Calculate and output mean line length if enabled
if self.output_line_mean_len {
let mean_len = self.total_line_length as f64 / self.line_count_for_stats as f64;
// Round to nearest integer // Round to nearest integer
let mean_len_int = mean_len.round() as usize; let mean_len_int = mean_len.round() as usize;
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
@@ -295,7 +305,18 @@ impl TextMetaPlugin {
} }
} }
if output_median { // Calculate and output median line length if enabled
if self.output_line_median_len {
if let Some(lengths) = &self.line_lengths {
if !lengths.is_empty() {
let mut sorted_lengths = lengths.clone();
sorted_lengths.sort();
let median_len = if lengths.len() % 2 == 0 {
(sorted_lengths[lengths.len() / 2 - 1] + sorted_lengths[lengths.len() / 2]) as f64 / 2.0
} else {
sorted_lengths[lengths.len() / 2] as f64
};
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs( if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
"text_line_median_len", "text_line_median_len",
median_len.to_string(), median_len.to_string(),
@@ -304,15 +325,15 @@ impl TextMetaPlugin {
log::debug!("TEXT: Adding median line length metadata: {:?}", meta_data); log::debug!("TEXT: Adding median line length metadata: {:?}", meta_data);
metadata.push(meta_data); metadata.push(meta_data);
} }
} else {
log::debug!("TEXT: No line lengths recorded for median calculation");
} }
} else { } else {
log::debug!("TEXT: No line lengths recorded"); log::debug!("TEXT: Line lengths tracking is None for median calculation");
}
} }
} else { } else {
log::debug!("TEXT: Line lengths tracking is None"); log::debug!("TEXT: Line lengths tracking is disabled or no lines processed");
}
} else {
log::debug!("TEXT: Line lengths tracking is disabled");
} }
metadata metadata