From 17c7b351f0e53ad2253b19c25d8133bf33d0c063 Mon Sep 17 00:00:00 2001 From: Chris Mikkelson Date: Wed, 7 Aug 2024 00:53:10 -0600 Subject: [PATCH] WIP: working reader, with test Squashed several bugs, will have to squash `.expect()`s next. --- src/reader/block.rs | 32 ++++++++++++++++++++++----- src/reader/mod.rs | 53 +++++++++++++++++++++++++++++++++++---------- tests/rwtest.rs | 22 +++++++++++++++++++ 3 files changed, 91 insertions(+), 16 deletions(-) create mode 100644 tests/rwtest.rs diff --git a/src/reader/block.rs b/src/reader/block.rs index 9859047..b2bf25c 100644 --- a/src/reader/block.rs +++ b/src/reader/block.rs @@ -38,6 +38,7 @@ impl RestartType { #[derive(Debug)] pub(crate) struct Block> { data: DataSlice, + restart_count: usize, restart_off: usize, restart_type: RestartType, } @@ -49,13 +50,13 @@ impl> Block { } let rc_off = data.as_ref().len() - size_of::(); - let nrestarts = u32::from_be_bytes(data.as_ref()[rc_off..].try_into()?) as usize; + let restart_count = u32::from_be_bytes(data.as_ref()[rc_off..].try_into()?) as usize; // try 32-bit restarts - if (nrestarts * size_of::()) > rc_off { + if (restart_count * size_of::()) > rc_off { return Err("block data too short 2".into()); } - let r_off = rc_off - (nrestarts * size_of::()); + let r_off = rc_off - (restart_count * size_of::()); let restart_type: RestartType; let restart_off: usize; @@ -65,10 +66,11 @@ impl> Block { } else { // try 64-bit restarts restart_type = RestartType::U64; - restart_off = rc_off - nrestarts * size_of::(); + restart_off = rc_off - restart_count * size_of::(); }; Ok(Block { data, + restart_count, restart_type, restart_off, }) @@ -180,5 +182,25 @@ impl> Iterator for BlockIter { } impl> Iter for BlockIter { - fn seek(&mut self, _key: &[u8]) {} + fn seek(&mut self, key: &[u8]) { + // TODO: "galloping search" + let mut left: usize = 0; + let mut right: usize = self.block.restart_count; + while left < right { + let mid = left + (right - left) / 2; + self.seek_restart(mid) + .and_then(|()| { + if self.cur_ent.as_ref().unwrap().key.as_slice() > key { + right = mid; + } else { + left = mid + 1; + } + Some(()) + }) + .or_else(|| { + left = right; // breaks loop + Some(()) + }); + } + } } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 7520098..4e11787 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -1,8 +1,10 @@ use crate::metadata::Metadata; +use crate::source::{DefaultSource, IterSource}; use crate::Entry; use crate::Iter; -use integer_encoding::VarInt; +use integer_encoding::{FixedInt, VarInt}; pub(crate) mod block; +use std::io::{Cursor, Seek, SeekFrom}; use std::sync::Arc; // interface for a range @@ -52,12 +54,22 @@ fn test_data_slice() { assert_eq!(cr.as_ref(), vec![2u8, 3]); } -struct Reader> { +pub struct Reader> { data: DataSlice, metadata: Metadata, } impl> Reader { + pub fn new(d: D) -> Self { + let mut cur = Cursor::new(&d.as_ref()[d.as_ref().len() - 512..]); + cur.seek(SeekFrom::End(-512)).expect("bad seek"); + let metadata = Metadata::read_from(cur).expect("bad meta"); + Self { + data: DataSlice::new(d), + metadata, + } + } + fn index_iter(&self) -> block::BlockIter { block::Block::new(self.data.clone_range( self.metadata.index_block_offset, @@ -68,7 +80,7 @@ impl> Reader { } } -struct ReaderIter<'r, D: AsRef<[u8]>> { +pub struct ReaderIter<'r, D: AsRef<[u8]>> { reader: &'r Reader, next_offset: usize, index_iter: block::BlockIter, @@ -88,17 +100,22 @@ impl> Reader { impl<'r, D: AsRef<[u8]>> ReaderIter<'r, D> { fn next_block(&mut self) -> Option<()> { - if self.next_offset >= self.reader.data.len() { + if self.next_offset >= self.reader.metadata.index_block_offset { return None; } - let (size, len_size) = usize::decode_var(&self.reader.data.as_ref()[self.next_offset..])?; + let (size, len_size) = usize::decode_var(&self.reader.data.as_ref()[self.next_offset..]) + .expect("bad block size"); let crc_off = self.next_offset + len_size; - let (_crc, len_crc) = u32::decode_var(&self.reader.data.as_ref()[crc_off..])?; - let data_off = crc_off + len_crc; - self.next_offset += data_off + size; + let data_off = crc_off + std::mem::size_of::(); + let _crc = u32::from_be_bytes( + self.reader.data.as_ref()[crc_off..crc_off + 4] + .try_into() + .ok()?, + ); + self.next_offset = data_off + size; self.data_iter.replace( block::Block::new(self.reader.data.clone_range(data_off, size)) - .ok()? + .expect("bad block") .into_iter(), ); Some(()) @@ -109,8 +126,8 @@ impl<'r, D: AsRef<[u8]>> Iterator for ReaderIter<'r, D> { type Item = Entry; fn next(&mut self) -> Option { if self.data_iter.is_none() { - self.next_block(); - self.data_iter.as_mut().unwrap().next() + self.next_block() + .and_then(|_| self.data_iter.as_mut().unwrap().next()) } else { match self.data_iter.as_mut().unwrap().next() { Some(e) => Some(e), @@ -136,3 +153,17 @@ impl<'r, D: AsRef<[u8]>> Iter for ReaderIter<'r, D> { }); } } + +impl<'r, D: AsRef<[u8]>> IterSource for &'r Reader { + type It = ReaderIter<'r, D>; + fn iter(&self) -> Self::It { + ReaderIter { + reader: self, + next_offset: 0, + index_iter: self.index_iter(), + data_iter: None, + } + } +} + +impl<'r, D: AsRef<[u8]>> DefaultSource for &'r Reader {} diff --git a/tests/rwtest.rs b/tests/rwtest.rs new file mode 100644 index 0000000..a378f21 --- /dev/null +++ b/tests/rwtest.rs @@ -0,0 +1,22 @@ +use mtbl::entry::Entry; +use mtbl::reader::Reader; +use mtbl::writer::Writer; + +#[test] +fn test_write_readback() { + let mut store = Vec::::new(); + let mut reference = Vec::::new(); + { + let mut w = Writer::new(&mut store).blocksize(128); + for i in 1..10 { + let e = Entry::new(u32::to_be_bytes(i), u32::to_be_bytes(i * 1024)); + w.add(e.clone()).expect("add failed"); + reference.push(e); + } + } + + assert!(store.len() > 512); + let r = Reader::new(&store); + let ri = r.iter(); + assert_eq!(ri.collect::>(), reference); +} -- 2.50.1