feat: add line length statistics tracking flags

Co-authored-by: aider (openai/andrew/openrouter/deepseek/deepseek-chat-v3.1) <aider@aider.chat>
This commit is contained in:
Andrew Phillips
2025-08-27 10:35:14 -03:00
parent 1ea4fc2180
commit e8f2c00416

View File

@@ -19,9 +19,17 @@ pub struct TextMetaPlugin {
track_word_count: bool,
track_line_count: bool,
track_line_lengths: bool,
// Flags for which line length statistics to output
output_line_max_len: bool,
output_line_mean_len: bool,
output_line_median_len: bool,
// For tracking line lengths
line_lengths: Option<Vec<usize>>,
current_line_length: usize,
// For incremental calculation of max and mean
max_line_length: usize,
total_line_length: usize,
line_count_for_stats: usize,
}
impl TextMetaPlugin {
@@ -82,8 +90,16 @@ impl TextMetaPlugin {
track_word_count,
track_line_count,
track_line_lengths,
// Set output flags
output_line_max_len: track_line_max_len,
output_line_mean_len: track_line_mean_len,
output_line_median_len: track_line_median_len,
line_lengths: if track_line_lengths { Some(Vec::new()) } else { None },
current_line_length: 0,
// Initialize incremental tracking for max and mean
max_line_length: 0,
total_line_length: 0,
line_count_for_stats: 0,
}
}
@@ -146,9 +162,20 @@ impl TextMetaPlugin {
if self.track_line_lengths {
for ch in text.chars() {
if ch == '\n' {
// Update max line length
if self.current_line_length > self.max_line_length {
self.max_line_length = self.current_line_length;
}
// Update total for mean calculation
self.total_line_length += self.current_line_length;
self.line_count_for_stats += 1;
// Only store individual lengths if median is needed
if let Some(ref mut lengths) = self.line_lengths {
lengths.push(self.current_line_length);
}
self.current_line_length = 0;
} else {
self.current_line_length += 1;
@@ -199,6 +226,16 @@ impl TextMetaPlugin {
// Handle the last line if tracking line lengths
if self.track_line_lengths && self.current_line_length > 0 {
// Update max line length for the last line
if self.current_line_length > self.max_line_length {
self.max_line_length = self.current_line_length;
}
// Update total for mean calculation for the last line
self.total_line_length += self.current_line_length;
self.line_count_for_stats += 1;
// Only store individual lengths if median is needed
if let Some(ref mut lengths) = self.line_lengths {
lengths.push(self.current_line_length);
}
@@ -237,65 +274,49 @@ impl TextMetaPlugin {
}
// Output line length statistics if tracked
if self.track_line_lengths {
if let Some(lengths) = &self.line_lengths {
log::debug!("TEXT: Line lengths: {:?} (count: {})", lengths, lengths.len());
if !lengths.is_empty() {
// Calculate max, mean, median
let max_len = lengths.iter().max().unwrap();
let sum: usize = lengths.iter().sum();
let mean_len = sum as f64 / lengths.len() as f64;
if self.track_line_lengths && self.line_count_for_stats > 0 {
log::debug!("TEXT: Output flags - max: {}, mean: {}, median: {}",
self.output_line_max_len, self.output_line_mean_len, self.output_line_median_len);
let mut sorted_lengths = lengths.clone();
sorted_lengths.sort();
let median_len = if lengths.len() % 2 == 0 {
(sorted_lengths[lengths.len() / 2 - 1] + sorted_lengths[lengths.len() / 2]) as f64 / 2.0
} else {
sorted_lengths[lengths.len() / 2] as f64
};
// Calculate and output max line length if enabled
if self.output_line_max_len {
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
"text_line_max_len",
self.max_line_length.to_string(),
self.base.outputs()
) {
log::debug!("TEXT: Adding max line length metadata: {:?}", meta_data);
metadata.push(meta_data);
}
}
log::debug!("TEXT: Line stats - max: {}, mean: {}, median: {}", max_len, mean_len, median_len);
// Calculate and output mean line length if enabled
if self.output_line_mean_len {
let mean_len = self.total_line_length as f64 / self.line_count_for_stats as f64;
// Round to nearest integer
let mean_len_int = mean_len.round() as usize;
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
"text_line_mean_len",
mean_len_int.to_string(),
self.base.outputs()
) {
log::debug!("TEXT: Adding mean line length metadata: {:?}", meta_data);
metadata.push(meta_data);
}
}
// Check if each statistic should be output based on options
// Default to true if option is not present
let output_max = self.base.options.get("text_line_max_len")
.and_then(|v| v.as_bool())
.unwrap_or(true);
let output_mean = self.base.options.get("text_line_mean_len")
.and_then(|v| v.as_bool())
.unwrap_or(true);
let output_median = self.base.options.get("text_line_median_len")
.and_then(|v| v.as_bool())
.unwrap_or(true);
// Calculate and output median line length if enabled
if self.output_line_median_len {
if let Some(lengths) = &self.line_lengths {
if !lengths.is_empty() {
let mut sorted_lengths = lengths.clone();
sorted_lengths.sort();
let median_len = if lengths.len() % 2 == 0 {
(sorted_lengths[lengths.len() / 2 - 1] + sorted_lengths[lengths.len() / 2]) as f64 / 2.0
} else {
sorted_lengths[lengths.len() / 2] as f64
};
log::debug!("TEXT: Output flags - max: {}, mean: {}, median: {}", output_max, output_mean, output_median);
// Add each statistic if enabled
if output_max {
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
"text_line_max_len",
max_len.to_string(),
self.base.outputs()
) {
log::debug!("TEXT: Adding max line length metadata: {:?}", meta_data);
metadata.push(meta_data);
}
}
if output_mean {
// Round to nearest integer
let mean_len_int = mean_len.round() as usize;
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
"text_line_mean_len",
mean_len_int.to_string(),
self.base.outputs()
) {
log::debug!("TEXT: Adding mean line length metadata: {:?}", meta_data);
metadata.push(meta_data);
}
}
if output_median {
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
"text_line_median_len",
median_len.to_string(),
@@ -304,15 +325,15 @@ impl TextMetaPlugin {
log::debug!("TEXT: Adding median line length metadata: {:?}", meta_data);
metadata.push(meta_data);
}
} else {
log::debug!("TEXT: No line lengths recorded for median calculation");
}
} else {
log::debug!("TEXT: No line lengths recorded");
log::debug!("TEXT: Line lengths tracking is None for median calculation");
}
} else {
log::debug!("TEXT: Line lengths tracking is None");
}
} else {
log::debug!("TEXT: Line lengths tracking is disabled");
log::debug!("TEXT: Line lengths tracking is disabled or no lines processed");
}
metadata