feat: add line length statistics tracking flags
Co-authored-by: aider (openai/andrew/openrouter/deepseek/deepseek-chat-v3.1) <aider@aider.chat>
This commit is contained in:
@@ -19,9 +19,17 @@ pub struct TextMetaPlugin {
|
||||
track_word_count: bool,
|
||||
track_line_count: bool,
|
||||
track_line_lengths: bool,
|
||||
// Flags for which line length statistics to output
|
||||
output_line_max_len: bool,
|
||||
output_line_mean_len: bool,
|
||||
output_line_median_len: bool,
|
||||
// For tracking line lengths
|
||||
line_lengths: Option<Vec<usize>>,
|
||||
current_line_length: usize,
|
||||
// For incremental calculation of max and mean
|
||||
max_line_length: usize,
|
||||
total_line_length: usize,
|
||||
line_count_for_stats: usize,
|
||||
}
|
||||
|
||||
impl TextMetaPlugin {
|
||||
@@ -82,8 +90,16 @@ impl TextMetaPlugin {
|
||||
track_word_count,
|
||||
track_line_count,
|
||||
track_line_lengths,
|
||||
// Set output flags
|
||||
output_line_max_len: track_line_max_len,
|
||||
output_line_mean_len: track_line_mean_len,
|
||||
output_line_median_len: track_line_median_len,
|
||||
line_lengths: if track_line_lengths { Some(Vec::new()) } else { None },
|
||||
current_line_length: 0,
|
||||
// Initialize incremental tracking for max and mean
|
||||
max_line_length: 0,
|
||||
total_line_length: 0,
|
||||
line_count_for_stats: 0,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -146,9 +162,20 @@ impl TextMetaPlugin {
|
||||
if self.track_line_lengths {
|
||||
for ch in text.chars() {
|
||||
if ch == '\n' {
|
||||
// Update max line length
|
||||
if self.current_line_length > self.max_line_length {
|
||||
self.max_line_length = self.current_line_length;
|
||||
}
|
||||
|
||||
// Update total for mean calculation
|
||||
self.total_line_length += self.current_line_length;
|
||||
self.line_count_for_stats += 1;
|
||||
|
||||
// Only store individual lengths if median is needed
|
||||
if let Some(ref mut lengths) = self.line_lengths {
|
||||
lengths.push(self.current_line_length);
|
||||
}
|
||||
|
||||
self.current_line_length = 0;
|
||||
} else {
|
||||
self.current_line_length += 1;
|
||||
@@ -199,6 +226,16 @@ impl TextMetaPlugin {
|
||||
|
||||
// Handle the last line if tracking line lengths
|
||||
if self.track_line_lengths && self.current_line_length > 0 {
|
||||
// Update max line length for the last line
|
||||
if self.current_line_length > self.max_line_length {
|
||||
self.max_line_length = self.current_line_length;
|
||||
}
|
||||
|
||||
// Update total for mean calculation for the last line
|
||||
self.total_line_length += self.current_line_length;
|
||||
self.line_count_for_stats += 1;
|
||||
|
||||
// Only store individual lengths if median is needed
|
||||
if let Some(ref mut lengths) = self.line_lengths {
|
||||
lengths.push(self.current_line_length);
|
||||
}
|
||||
@@ -237,65 +274,49 @@ impl TextMetaPlugin {
|
||||
}
|
||||
|
||||
// Output line length statistics if tracked
|
||||
if self.track_line_lengths {
|
||||
if let Some(lengths) = &self.line_lengths {
|
||||
log::debug!("TEXT: Line lengths: {:?} (count: {})", lengths, lengths.len());
|
||||
if !lengths.is_empty() {
|
||||
// Calculate max, mean, median
|
||||
let max_len = lengths.iter().max().unwrap();
|
||||
let sum: usize = lengths.iter().sum();
|
||||
let mean_len = sum as f64 / lengths.len() as f64;
|
||||
|
||||
let mut sorted_lengths = lengths.clone();
|
||||
sorted_lengths.sort();
|
||||
let median_len = if lengths.len() % 2 == 0 {
|
||||
(sorted_lengths[lengths.len() / 2 - 1] + sorted_lengths[lengths.len() / 2]) as f64 / 2.0
|
||||
} else {
|
||||
sorted_lengths[lengths.len() / 2] as f64
|
||||
};
|
||||
|
||||
log::debug!("TEXT: Line stats - max: {}, mean: {}, median: {}", max_len, mean_len, median_len);
|
||||
|
||||
// Check if each statistic should be output based on options
|
||||
// Default to true if option is not present
|
||||
let output_max = self.base.options.get("text_line_max_len")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(true);
|
||||
let output_mean = self.base.options.get("text_line_mean_len")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(true);
|
||||
let output_median = self.base.options.get("text_line_median_len")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(true);
|
||||
|
||||
log::debug!("TEXT: Output flags - max: {}, mean: {}, median: {}", output_max, output_mean, output_median);
|
||||
|
||||
// Add each statistic if enabled
|
||||
if output_max {
|
||||
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
|
||||
"text_line_max_len",
|
||||
max_len.to_string(),
|
||||
self.base.outputs()
|
||||
) {
|
||||
log::debug!("TEXT: Adding max line length metadata: {:?}", meta_data);
|
||||
metadata.push(meta_data);
|
||||
}
|
||||
}
|
||||
|
||||
if output_mean {
|
||||
// Round to nearest integer
|
||||
let mean_len_int = mean_len.round() as usize;
|
||||
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
|
||||
"text_line_mean_len",
|
||||
mean_len_int.to_string(),
|
||||
self.base.outputs()
|
||||
) {
|
||||
log::debug!("TEXT: Adding mean line length metadata: {:?}", meta_data);
|
||||
metadata.push(meta_data);
|
||||
}
|
||||
}
|
||||
|
||||
if output_median {
|
||||
if self.track_line_lengths && self.line_count_for_stats > 0 {
|
||||
log::debug!("TEXT: Output flags - max: {}, mean: {}, median: {}",
|
||||
self.output_line_max_len, self.output_line_mean_len, self.output_line_median_len);
|
||||
|
||||
// Calculate and output max line length if enabled
|
||||
if self.output_line_max_len {
|
||||
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
|
||||
"text_line_max_len",
|
||||
self.max_line_length.to_string(),
|
||||
self.base.outputs()
|
||||
) {
|
||||
log::debug!("TEXT: Adding max line length metadata: {:?}", meta_data);
|
||||
metadata.push(meta_data);
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate and output mean line length if enabled
|
||||
if self.output_line_mean_len {
|
||||
let mean_len = self.total_line_length as f64 / self.line_count_for_stats as f64;
|
||||
// Round to nearest integer
|
||||
let mean_len_int = mean_len.round() as usize;
|
||||
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
|
||||
"text_line_mean_len",
|
||||
mean_len_int.to_string(),
|
||||
self.base.outputs()
|
||||
) {
|
||||
log::debug!("TEXT: Adding mean line length metadata: {:?}", meta_data);
|
||||
metadata.push(meta_data);
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate and output median line length if enabled
|
||||
if self.output_line_median_len {
|
||||
if let Some(lengths) = &self.line_lengths {
|
||||
if !lengths.is_empty() {
|
||||
let mut sorted_lengths = lengths.clone();
|
||||
sorted_lengths.sort();
|
||||
let median_len = if lengths.len() % 2 == 0 {
|
||||
(sorted_lengths[lengths.len() / 2 - 1] + sorted_lengths[lengths.len() / 2]) as f64 / 2.0
|
||||
} else {
|
||||
sorted_lengths[lengths.len() / 2] as f64
|
||||
};
|
||||
|
||||
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
|
||||
"text_line_median_len",
|
||||
median_len.to_string(),
|
||||
@@ -304,15 +325,15 @@ impl TextMetaPlugin {
|
||||
log::debug!("TEXT: Adding median line length metadata: {:?}", meta_data);
|
||||
metadata.push(meta_data);
|
||||
}
|
||||
} else {
|
||||
log::debug!("TEXT: No line lengths recorded for median calculation");
|
||||
}
|
||||
} else {
|
||||
log::debug!("TEXT: No line lengths recorded");
|
||||
log::debug!("TEXT: Line lengths tracking is None for median calculation");
|
||||
}
|
||||
} else {
|
||||
log::debug!("TEXT: Line lengths tracking is None");
|
||||
}
|
||||
} else {
|
||||
log::debug!("TEXT: Line lengths tracking is disabled");
|
||||
log::debug!("TEXT: Line lengths tracking is disabled or no lines processed");
|
||||
}
|
||||
|
||||
metadata
|
||||
|
||||
Reference in New Issue
Block a user