diff --git a/src/services/async_item_service.rs b/src/services/async_item_service.rs index 24c5fc2..406da12 100644 --- a/src/services/async_item_service.rs +++ b/src/services/async_item_service.rs @@ -65,13 +65,35 @@ impl AsyncItemService { .unwrap() } - pub async fn get_item_content_info(&self, id: i64) -> Result<(Vec, String, bool), CoreError> { + pub async fn get_item_content_info( + &self, + id: i64, + head_bytes: Option, + head_words: Option, + head_lines: Option, + tail_bytes: Option, + tail_words: Option, + tail_lines: Option, + line_start: Option, + line_end: Option, + ) -> Result<(Vec, String, bool), CoreError> { let db = self.db.clone(); let item_service = self.item_service.clone(); tokio::task::spawn_blocking(move || { let conn = db.blocking_lock(); - item_service.get_item_content_info(&conn, id) + item_service.get_item_content_info( + &conn, + id, + head_bytes, + head_words, + head_lines, + tail_bytes, + tail_words, + tail_lines, + line_start, + line_end + ) }) .await .unwrap() diff --git a/src/services/item_service.rs b/src/services/item_service.rs index c796044..3b9f47c 100644 --- a/src/services/item_service.rs +++ b/src/services/item_service.rs @@ -82,12 +82,16 @@ impl ItemService { let item_with_content = self.get_item_content(conn, id)?; let mut content = item_with_content.content; - // Apply content filtering here - // This would process the content according to the parameters - // For now, we'll just return the full content - // Implement the actual filtering logic based on the parameters - let metadata = item_with_content.item_with_meta.meta_as_map(); + // Apply content filtering + if head_bytes.is_some() || head_words.is_some() || head_lines.is_some() { + content = self.process_head(&content, head_bytes, head_words, head_lines); + } else if tail_bytes.is_some() || tail_words.is_some() || tail_lines.is_some() { + content = self.process_tail(&content, tail_bytes, tail_words, tail_lines); + } else if line_start.is_some() || line_end.is_some() { + content = self.process_line_range(&content, line_start, line_end); + } + let metadata = item_with_content.item_with_meta.meta_as_map(); let mime_type = metadata .get("mime_type") .map(|s| s.to_string()) @@ -97,10 +101,10 @@ impl ItemService { let is_binary = if let Some(text_val) = metadata.get("text") { text_val == "false" } else { - crate::common::is_binary::is_binary(&item_with_content.content) + crate::common::is_binary::is_binary(&content) }; - Ok((item_with_content.content, mime_type, is_binary)) + Ok((content, mime_type, is_binary)) } pub fn get_item_content_info_streaming( @@ -415,6 +419,132 @@ impl ItemService { pub fn get_data_path(&self) -> &PathBuf { &self.data_path } + + fn process_head(&self, content: &[u8], head_bytes: Option, head_words: Option, head_lines: Option) -> Vec { + let mut result = Vec::new(); + let mut bytes_remaining = head_bytes; + let mut words_remaining = head_words; + let mut lines_remaining = head_lines; + let mut in_word = false; + + for &byte in content { + // Check if any limits are reached + if bytes_remaining == Some(0) || words_remaining == Some(0) || lines_remaining == Some(0) { + break; + } + + result.push(byte); + + // Update bytes remaining + if let Some(remaining) = &mut bytes_remaining { + *remaining -= 1; + } + + // Check for newlines + if let Some(remaining) = &mut lines_remaining { + if byte == b'\n' && *remaining > 0 { + *remaining -= 1; + } + } + + // Check for words + if let Some(remaining) = &mut words_remaining { + let is_whitespace = byte.is_ascii_whitespace(); + if in_word && is_whitespace { + in_word = false; + if *remaining > 0 { + *remaining -= 1; + } + } else if !is_whitespace { + in_word = true; + } + } + } + result + } + + fn process_tail(&self, content: &[u8], tail_bytes: Option, tail_words: Option, tail_lines: Option) -> Vec { + // For simplicity, we'll process from the end + // This implementation may not be perfect for words and lines, but it's a start + let mut result = Vec::new(); + + if let Some(bytes) = tail_bytes { + let start = if content.len() > bytes { content.len() - bytes } else { 0 }; + return content[start..].to_vec(); + } + + // For words and lines, we need to process from the end + // This is a simplified implementation + if let Some(lines) = tail_lines { + let mut line_count = 0; + let mut i = content.len(); + while i > 0 { + i -= 1; + if content[i] == b'\n' { + line_count += 1; + if line_count == lines { + break; + } + } + } + return content[i..].to_vec(); + } + + if let Some(words) = tail_words { + let mut word_count = 0; + let mut i = content.len(); + let mut in_word = false; + while i > 0 { + i -= 1; + let is_whitespace = content[i].is_ascii_whitespace(); + if !in_word && !is_whitespace { + in_word = true; + word_count += 1; + if word_count == words { + break; + } + } else if is_whitespace { + in_word = false; + } + } + return content[i..].to_vec(); + } + + content.to_vec() + } + + fn process_line_range(&self, content: &[u8], line_start: Option, line_end: Option) -> Vec { + let start_line = line_start.unwrap_or(1); + let end_line = line_end.unwrap_or(usize::MAX); + + let mut result = Vec::new(); + let mut current_line = 1; + let mut line_start_index = 0; + let mut in_range = false; + + for (i, &byte) in content.iter().enumerate() { + if current_line > end_line { + break; + } + + if current_line >= start_line && current_line <= end_line { + if !in_range { + in_range = true; + line_start_index = i; + } + result.push(byte); + } + + if byte == b'\n' { + current_line += 1; + if current_line > end_line { + break; + } + } + } + + result + } } // Head filter implementation @@ -506,46 +636,47 @@ impl Read for HeadFilter { } } -// Tail filter implementation (uses a fixed buffer to avoid keeping everything in memory) +// Tail filter implementation using a ring buffer struct TailFilter { inner: R, - buffer: Vec, + ring_buffer: Vec, + ring_buffer_pos: usize, tail_bytes: Option, tail_words: Option, tail_lines: Option, is_eof: bool, + bytes_read: usize, } impl TailFilter { fn new( - mut inner: R, + inner: R, tail_bytes: Option, tail_words: Option, tail_lines: Option, ) -> std::io::Result { - // For simplicity, we'll use a fixed buffer size - // In a real implementation, you might want to make this configurable - let mut buffer = vec![0; 8192]; - let mut result = Vec::new(); + // Determine buffer size based on the largest tail parameter + let buffer_size = if let Some(bytes) = tail_bytes { + bytes + } else if let Some(lines) = tail_lines { + // Estimate 256 bytes per line + lines * 256 + } else if let Some(words) = tail_words { + // Estimate 16 bytes per word + words * 16 + } else { + 8192 + }; - loop { - let n = inner.read(&mut buffer)?; - if n == 0 { - break; - } - result.extend_from_slice(&buffer[..n]); - } - - // Process the result to find the tail - // This implementation keeps the result in memory, which may not be ideal for very large files - // For a true streaming implementation, a more complex approach is needed Ok(Self { inner, - buffer: result, + ring_buffer: vec![0; buffer_size], + ring_buffer_pos: 0, tail_bytes, tail_words, tail_lines, is_eof: false, + bytes_read: 0, }) } } @@ -556,13 +687,28 @@ impl Read for TailFilter { return Ok(0); } - // Process the buffered data to extract the tail - // This is a placeholder implementation - // For now, just return the entire buffer - let to_copy = std::cmp::min(buf.len(), self.buffer.len()); - buf[..to_copy].copy_from_slice(&self.buffer[..to_copy]); - self.is_eof = true; - Ok(to_copy) + // Fill the ring buffer with data from the inner reader + let mut temp_buf = vec![0; 8192]; + let n = self.inner.read(&mut temp_buf)?; + if n == 0 { + self.is_eof = true; + // Now we can process the ring buffer to extract the tail + // This part needs to be implemented based on the tail parameters + // For now, just return from the ring buffer + let to_copy = std::cmp::min(buf.len(), self.ring_buffer.len()); + buf[..to_copy].copy_from_slice(&self.ring_buffer[..to_copy]); + return Ok(to_copy); + } + + // Add new data to the ring buffer + for &byte in &temp_buf[..n] { + self.ring_buffer[self.ring_buffer_pos] = byte; + self.ring_buffer_pos = (self.ring_buffer_pos + 1) % self.ring_buffer.len(); + self.bytes_read += 1; + } + + // We're still reading, so return 0 until EOF + Ok(0) } }