From: Chris Mikkelson Date: Fri, 2 Aug 2024 23:42:45 +0000 (-0500) Subject: Refactor block, block_builder for readability, magic numbers X-Git-Url: https://git.mikk.net/?a=commitdiff_plain;h=e28f7432555e6a607b4d533d9d54c83498a6916a;p=mtbl-rs Refactor block, block_builder for readability, magic numbers Fix "off by 4" bug caused by double call to block builder finish. --- diff --git a/src/reader/block.rs b/src/reader/block.rs index e186936..9859095 100644 --- a/src/reader/block.rs +++ b/src/reader/block.rs @@ -1,4 +1,5 @@ use crate::{Entry, Iter}; +use std::mem::size_of; use std::sync::Arc; type Error = Box; @@ -14,7 +15,7 @@ impl<'b> Restarts<'b> { fn decode(&self, ridx: usize) -> Result { match self { Restarts::Restart32(r) => { - let len = 4; + let len = size_of::(); let start = ridx * len; let end = start + len; if end > r.len() { @@ -24,7 +25,7 @@ impl<'b> Restarts<'b> { } } Restarts::Restart64(r) => { - let len = 8; + let len = size_of::(); let start = ridx * len; let end = start + len; if end > r.len() { @@ -45,35 +46,32 @@ pub(crate) struct Block<'b> { impl<'b> Block<'b> { pub(crate) fn new(data: &'b [u8]) -> Result { - let mut dlen = data.len(); - - if data.len() < 4 { + if data.len() < size_of::() { return Err("block data too short".into()); } - dlen -= 4; + let rc_off = data.len() - size_of::(); - let nrestarts = u32::from_be_bytes(data[dlen..].try_into()?) as usize; + let nrestarts = u32::from_be_bytes(data[rc_off..].try_into()?) as usize; // try 32-bit restarts - let mut len_restarts = nrestarts * 4; - if len_restarts > dlen { + if (nrestarts * size_of::()) > rc_off { return Err("block data too short 2".into()); } - let mut dlen = dlen - len_restarts; + let mut r_off = rc_off - (nrestarts * size_of::()); - let restarts = if dlen <= u32::MAX as usize { - Restarts::Restart32(&data[dlen..data.len() - 4]) + let restarts = if r_off <= u32::MAX as usize { + Restarts::Restart32(&data[r_off..rc_off]) } else { // try 64-bit restarts - dlen -= len_restarts; - len_restarts *= 2; - if len_restarts > dlen { + if (nrestarts * size_of::()) > rc_off { return Err("block data too short 3".into()); } - Restarts::Restart64(&data[dlen..data.len() - 4]) + r_off = rc_off - (nrestarts * size_of::()); + Restarts::Restart64(&data[r_off..rc_off]) }; + println!("Block::new() -- r_off = {}", r_off); Ok(Block { - data: &data[..dlen - 4], // XXX - debugme -- off by 4 + data: &data[..r_off], restarts, }) } diff --git a/src/writer/block_builder.rs b/src/writer/block_builder.rs index 05574c1..41f7d7d 100644 --- a/src/writer/block_builder.rs +++ b/src/writer/block_builder.rs @@ -1,3 +1,5 @@ +use std::mem::size_of; + pub struct BlockBuilder { prev_key: Vec, data: Vec, @@ -64,9 +66,9 @@ impl BlockBuilder { fn restart_size(&self) -> usize { if self.data.len() > u32::MAX as usize { - 4 + size_of::() } else { - 8 + size_of::() } } @@ -74,7 +76,7 @@ impl BlockBuilder { if self.finished { self.data.len() } else { - self.data.len() + self.restarts.len() * self.restart_size() + 4 + self.data.len() + self.restarts.len() * self.restart_size() + size_of::() } } @@ -82,10 +84,12 @@ impl BlockBuilder { if self.finished { return self.data.as_slice(); } + self.finished = true; let num_restarts = self.restarts.len(); assert!(num_restarts <= u32::MAX as usize); - self.data.reserve(num_restarts * self.restart_size() + 4); + self.data + .reserve(num_restarts * self.restart_size() + size_of::()); match self.restart_size() { 4 => { for b in self.restarts.iter().map(|r| u32::to_be_bytes(*r as u32)) {