feat: add line length statistics tracking flags
Co-authored-by: aider (openai/andrew/openrouter/deepseek/deepseek-chat-v3.1) <aider@aider.chat>
This commit is contained in:
@@ -19,9 +19,17 @@ pub struct TextMetaPlugin {
|
|||||||
track_word_count: bool,
|
track_word_count: bool,
|
||||||
track_line_count: bool,
|
track_line_count: bool,
|
||||||
track_line_lengths: bool,
|
track_line_lengths: bool,
|
||||||
|
// Flags for which line length statistics to output
|
||||||
|
output_line_max_len: bool,
|
||||||
|
output_line_mean_len: bool,
|
||||||
|
output_line_median_len: bool,
|
||||||
// For tracking line lengths
|
// For tracking line lengths
|
||||||
line_lengths: Option<Vec<usize>>,
|
line_lengths: Option<Vec<usize>>,
|
||||||
current_line_length: usize,
|
current_line_length: usize,
|
||||||
|
// For incremental calculation of max and mean
|
||||||
|
max_line_length: usize,
|
||||||
|
total_line_length: usize,
|
||||||
|
line_count_for_stats: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TextMetaPlugin {
|
impl TextMetaPlugin {
|
||||||
@@ -82,8 +90,16 @@ impl TextMetaPlugin {
|
|||||||
track_word_count,
|
track_word_count,
|
||||||
track_line_count,
|
track_line_count,
|
||||||
track_line_lengths,
|
track_line_lengths,
|
||||||
|
// Set output flags
|
||||||
|
output_line_max_len: track_line_max_len,
|
||||||
|
output_line_mean_len: track_line_mean_len,
|
||||||
|
output_line_median_len: track_line_median_len,
|
||||||
line_lengths: if track_line_lengths { Some(Vec::new()) } else { None },
|
line_lengths: if track_line_lengths { Some(Vec::new()) } else { None },
|
||||||
current_line_length: 0,
|
current_line_length: 0,
|
||||||
|
// Initialize incremental tracking for max and mean
|
||||||
|
max_line_length: 0,
|
||||||
|
total_line_length: 0,
|
||||||
|
line_count_for_stats: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -146,9 +162,20 @@ impl TextMetaPlugin {
|
|||||||
if self.track_line_lengths {
|
if self.track_line_lengths {
|
||||||
for ch in text.chars() {
|
for ch in text.chars() {
|
||||||
if ch == '\n' {
|
if ch == '\n' {
|
||||||
|
// Update max line length
|
||||||
|
if self.current_line_length > self.max_line_length {
|
||||||
|
self.max_line_length = self.current_line_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update total for mean calculation
|
||||||
|
self.total_line_length += self.current_line_length;
|
||||||
|
self.line_count_for_stats += 1;
|
||||||
|
|
||||||
|
// Only store individual lengths if median is needed
|
||||||
if let Some(ref mut lengths) = self.line_lengths {
|
if let Some(ref mut lengths) = self.line_lengths {
|
||||||
lengths.push(self.current_line_length);
|
lengths.push(self.current_line_length);
|
||||||
}
|
}
|
||||||
|
|
||||||
self.current_line_length = 0;
|
self.current_line_length = 0;
|
||||||
} else {
|
} else {
|
||||||
self.current_line_length += 1;
|
self.current_line_length += 1;
|
||||||
@@ -199,6 +226,16 @@ impl TextMetaPlugin {
|
|||||||
|
|
||||||
// Handle the last line if tracking line lengths
|
// Handle the last line if tracking line lengths
|
||||||
if self.track_line_lengths && self.current_line_length > 0 {
|
if self.track_line_lengths && self.current_line_length > 0 {
|
||||||
|
// Update max line length for the last line
|
||||||
|
if self.current_line_length > self.max_line_length {
|
||||||
|
self.max_line_length = self.current_line_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update total for mean calculation for the last line
|
||||||
|
self.total_line_length += self.current_line_length;
|
||||||
|
self.line_count_for_stats += 1;
|
||||||
|
|
||||||
|
// Only store individual lengths if median is needed
|
||||||
if let Some(ref mut lengths) = self.line_lengths {
|
if let Some(ref mut lengths) = self.line_lengths {
|
||||||
lengths.push(self.current_line_length);
|
lengths.push(self.current_line_length);
|
||||||
}
|
}
|
||||||
@@ -237,65 +274,49 @@ impl TextMetaPlugin {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Output line length statistics if tracked
|
// Output line length statistics if tracked
|
||||||
if self.track_line_lengths {
|
if self.track_line_lengths && self.line_count_for_stats > 0 {
|
||||||
if let Some(lengths) = &self.line_lengths {
|
log::debug!("TEXT: Output flags - max: {}, mean: {}, median: {}",
|
||||||
log::debug!("TEXT: Line lengths: {:?} (count: {})", lengths, lengths.len());
|
self.output_line_max_len, self.output_line_mean_len, self.output_line_median_len);
|
||||||
if !lengths.is_empty() {
|
|
||||||
// Calculate max, mean, median
|
// Calculate and output max line length if enabled
|
||||||
let max_len = lengths.iter().max().unwrap();
|
if self.output_line_max_len {
|
||||||
let sum: usize = lengths.iter().sum();
|
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
|
||||||
let mean_len = sum as f64 / lengths.len() as f64;
|
"text_line_max_len",
|
||||||
|
self.max_line_length.to_string(),
|
||||||
let mut sorted_lengths = lengths.clone();
|
self.base.outputs()
|
||||||
sorted_lengths.sort();
|
) {
|
||||||
let median_len = if lengths.len() % 2 == 0 {
|
log::debug!("TEXT: Adding max line length metadata: {:?}", meta_data);
|
||||||
(sorted_lengths[lengths.len() / 2 - 1] + sorted_lengths[lengths.len() / 2]) as f64 / 2.0
|
metadata.push(meta_data);
|
||||||
} else {
|
}
|
||||||
sorted_lengths[lengths.len() / 2] as f64
|
}
|
||||||
};
|
|
||||||
|
// Calculate and output mean line length if enabled
|
||||||
log::debug!("TEXT: Line stats - max: {}, mean: {}, median: {}", max_len, mean_len, median_len);
|
if self.output_line_mean_len {
|
||||||
|
let mean_len = self.total_line_length as f64 / self.line_count_for_stats as f64;
|
||||||
// Check if each statistic should be output based on options
|
// Round to nearest integer
|
||||||
// Default to true if option is not present
|
let mean_len_int = mean_len.round() as usize;
|
||||||
let output_max = self.base.options.get("text_line_max_len")
|
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
|
||||||
.and_then(|v| v.as_bool())
|
"text_line_mean_len",
|
||||||
.unwrap_or(true);
|
mean_len_int.to_string(),
|
||||||
let output_mean = self.base.options.get("text_line_mean_len")
|
self.base.outputs()
|
||||||
.and_then(|v| v.as_bool())
|
) {
|
||||||
.unwrap_or(true);
|
log::debug!("TEXT: Adding mean line length metadata: {:?}", meta_data);
|
||||||
let output_median = self.base.options.get("text_line_median_len")
|
metadata.push(meta_data);
|
||||||
.and_then(|v| v.as_bool())
|
}
|
||||||
.unwrap_or(true);
|
}
|
||||||
|
|
||||||
log::debug!("TEXT: Output flags - max: {}, mean: {}, median: {}", output_max, output_mean, output_median);
|
// Calculate and output median line length if enabled
|
||||||
|
if self.output_line_median_len {
|
||||||
// Add each statistic if enabled
|
if let Some(lengths) = &self.line_lengths {
|
||||||
if output_max {
|
if !lengths.is_empty() {
|
||||||
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
|
let mut sorted_lengths = lengths.clone();
|
||||||
"text_line_max_len",
|
sorted_lengths.sort();
|
||||||
max_len.to_string(),
|
let median_len = if lengths.len() % 2 == 0 {
|
||||||
self.base.outputs()
|
(sorted_lengths[lengths.len() / 2 - 1] + sorted_lengths[lengths.len() / 2]) as f64 / 2.0
|
||||||
) {
|
} else {
|
||||||
log::debug!("TEXT: Adding max line length metadata: {:?}", meta_data);
|
sorted_lengths[lengths.len() / 2] as f64
|
||||||
metadata.push(meta_data);
|
};
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if output_mean {
|
|
||||||
// Round to nearest integer
|
|
||||||
let mean_len_int = mean_len.round() as usize;
|
|
||||||
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
|
|
||||||
"text_line_mean_len",
|
|
||||||
mean_len_int.to_string(),
|
|
||||||
self.base.outputs()
|
|
||||||
) {
|
|
||||||
log::debug!("TEXT: Adding mean line length metadata: {:?}", meta_data);
|
|
||||||
metadata.push(meta_data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if output_median {
|
|
||||||
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
|
if let Some(meta_data) = crate::meta_plugin::process_metadata_outputs(
|
||||||
"text_line_median_len",
|
"text_line_median_len",
|
||||||
median_len.to_string(),
|
median_len.to_string(),
|
||||||
@@ -304,15 +325,15 @@ impl TextMetaPlugin {
|
|||||||
log::debug!("TEXT: Adding median line length metadata: {:?}", meta_data);
|
log::debug!("TEXT: Adding median line length metadata: {:?}", meta_data);
|
||||||
metadata.push(meta_data);
|
metadata.push(meta_data);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
log::debug!("TEXT: No line lengths recorded for median calculation");
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log::debug!("TEXT: No line lengths recorded");
|
log::debug!("TEXT: Line lengths tracking is None for median calculation");
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
log::debug!("TEXT: Line lengths tracking is None");
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log::debug!("TEXT: Line lengths tracking is disabled");
|
log::debug!("TEXT: Line lengths tracking is disabled or no lines processed");
|
||||||
}
|
}
|
||||||
|
|
||||||
metadata
|
metadata
|
||||||
|
|||||||
Reference in New Issue
Block a user