]> git.mikk.net Git - mtbl-rs/commitdiff
WIP: working reader, with test
authorChris Mikkelson <cmikk@fsi.io>
Wed, 7 Aug 2024 06:53:10 +0000 (00:53 -0600)
committerChris Mikkelson <cmikk@fsi.io>
Wed, 7 Aug 2024 06:53:10 +0000 (00:53 -0600)
Squashed several bugs, will have to squash `.expect()`s next.

src/reader/block.rs
src/reader/mod.rs
tests/rwtest.rs [new file with mode: 0644]

index 9859047d786a5452450bd97693ea0a51b4a39739..b2bf25c53ebf9280847b6d05937bf2329bfe23cc 100644 (file)
@@ -38,6 +38,7 @@ impl RestartType {
 #[derive(Debug)]
 pub(crate) struct Block<D: AsRef<[u8]>> {
     data: DataSlice<D>,
+    restart_count: usize,
     restart_off: usize,
     restart_type: RestartType,
 }
@@ -49,13 +50,13 @@ impl<D: AsRef<[u8]>> Block<D> {
         }
         let rc_off = data.as_ref().len() - size_of::<u32>();
 
-        let nrestarts = u32::from_be_bytes(data.as_ref()[rc_off..].try_into()?) as usize;
+        let restart_count = u32::from_be_bytes(data.as_ref()[rc_off..].try_into()?) as usize;
 
         // try 32-bit restarts
-        if (nrestarts * size_of::<u32>()) > rc_off {
+        if (restart_count * size_of::<u32>()) > rc_off {
             return Err("block data too short 2".into());
         }
-        let r_off = rc_off - (nrestarts * size_of::<u32>());
+        let r_off = rc_off - (restart_count * size_of::<u32>());
         let restart_type: RestartType;
         let restart_off: usize;
 
@@ -65,10 +66,11 @@ impl<D: AsRef<[u8]>> Block<D> {
         } else {
             // try 64-bit restarts
             restart_type = RestartType::U64;
-            restart_off = rc_off - nrestarts * size_of::<u64>();
+            restart_off = rc_off - restart_count * size_of::<u64>();
         };
         Ok(Block {
             data,
+            restart_count,
             restart_type,
             restart_off,
         })
@@ -180,5 +182,25 @@ impl<D: AsRef<[u8]>> Iterator for BlockIter<D> {
 }
 
 impl<D: AsRef<[u8]>> Iter for BlockIter<D> {
-    fn seek(&mut self, _key: &[u8]) {}
+    fn seek(&mut self, key: &[u8]) {
+        // TODO: "galloping search"
+        let mut left: usize = 0;
+        let mut right: usize = self.block.restart_count;
+        while left < right {
+            let mid = left + (right - left) / 2;
+            self.seek_restart(mid)
+                .and_then(|()| {
+                    if self.cur_ent.as_ref().unwrap().key.as_slice() > key {
+                        right = mid;
+                    } else {
+                        left = mid + 1;
+                    }
+                    Some(())
+                })
+                .or_else(|| {
+                    left = right; // breaks loop
+                    Some(())
+                });
+        }
+    }
 }
index 75200980b87f751736d1a59d4ff1feb0b8f7a517..4e1178719d2fb8aba89fdb8c4bae2934fca9f934 100644 (file)
@@ -1,8 +1,10 @@
 use crate::metadata::Metadata;
+use crate::source::{DefaultSource, IterSource};
 use crate::Entry;
 use crate::Iter;
-use integer_encoding::VarInt;
+use integer_encoding::{FixedInt, VarInt};
 pub(crate) mod block;
+use std::io::{Cursor, Seek, SeekFrom};
 use std::sync::Arc;
 
 // interface for a range
@@ -52,12 +54,22 @@ fn test_data_slice() {
     assert_eq!(cr.as_ref(), vec![2u8, 3]);
 }
 
-struct Reader<D: AsRef<[u8]>> {
+pub struct Reader<D: AsRef<[u8]>> {
     data: DataSlice<D>,
     metadata: Metadata,
 }
 
 impl<D: AsRef<[u8]>> Reader<D> {
+    pub fn new(d: D) -> Self {
+        let mut cur = Cursor::new(&d.as_ref()[d.as_ref().len() - 512..]);
+        cur.seek(SeekFrom::End(-512)).expect("bad seek");
+        let metadata = Metadata::read_from(cur).expect("bad meta");
+        Self {
+            data: DataSlice::new(d),
+            metadata,
+        }
+    }
+
     fn index_iter(&self) -> block::BlockIter<D> {
         block::Block::new(self.data.clone_range(
             self.metadata.index_block_offset,
@@ -68,7 +80,7 @@ impl<D: AsRef<[u8]>> Reader<D> {
     }
 }
 
-struct ReaderIter<'r, D: AsRef<[u8]>> {
+pub struct ReaderIter<'r, D: AsRef<[u8]>> {
     reader: &'r Reader<D>,
     next_offset: usize,
     index_iter: block::BlockIter<D>,
@@ -88,17 +100,22 @@ impl<D: AsRef<[u8]>> Reader<D> {
 
 impl<'r, D: AsRef<[u8]>> ReaderIter<'r, D> {
     fn next_block(&mut self) -> Option<()> {
-        if self.next_offset >= self.reader.data.len() {
+        if self.next_offset >= self.reader.metadata.index_block_offset {
             return None;
         }
-        let (size, len_size) = usize::decode_var(&self.reader.data.as_ref()[self.next_offset..])?;
+        let (size, len_size) = usize::decode_var(&self.reader.data.as_ref()[self.next_offset..])
+            .expect("bad block size");
         let crc_off = self.next_offset + len_size;
-        let (_crc, len_crc) = u32::decode_var(&self.reader.data.as_ref()[crc_off..])?;
-        let data_off = crc_off + len_crc;
-        self.next_offset += data_off + size;
+        let data_off = crc_off + std::mem::size_of::<u32>();
+        let _crc = u32::from_be_bytes(
+            self.reader.data.as_ref()[crc_off..crc_off + 4]
+                .try_into()
+                .ok()?,
+        );
+        self.next_offset = data_off + size;
         self.data_iter.replace(
             block::Block::new(self.reader.data.clone_range(data_off, size))
-                .ok()?
+                .expect("bad block")
                 .into_iter(),
         );
         Some(())
@@ -109,8 +126,8 @@ impl<'r, D: AsRef<[u8]>> Iterator for ReaderIter<'r, D> {
     type Item = Entry;
     fn next(&mut self) -> Option<Self::Item> {
         if self.data_iter.is_none() {
-            self.next_block();
-            self.data_iter.as_mut().unwrap().next()
+            self.next_block()
+                .and_then(|_| self.data_iter.as_mut().unwrap().next())
         } else {
             match self.data_iter.as_mut().unwrap().next() {
                 Some(e) => Some(e),
@@ -136,3 +153,17 @@ impl<'r, D: AsRef<[u8]>> Iter for ReaderIter<'r, D> {
         });
     }
 }
+
+impl<'r, D: AsRef<[u8]>> IterSource for &'r Reader<D> {
+    type It = ReaderIter<'r, D>;
+    fn iter(&self) -> Self::It {
+        ReaderIter {
+            reader: self,
+            next_offset: 0,
+            index_iter: self.index_iter(),
+            data_iter: None,
+        }
+    }
+}
+
+impl<'r, D: AsRef<[u8]>> DefaultSource for &'r Reader<D> {}
diff --git a/tests/rwtest.rs b/tests/rwtest.rs
new file mode 100644 (file)
index 0000000..a378f21
--- /dev/null
@@ -0,0 +1,22 @@
+use mtbl::entry::Entry;
+use mtbl::reader::Reader;
+use mtbl::writer::Writer;
+
+#[test]
+fn test_write_readback() {
+    let mut store = Vec::<u8>::new();
+    let mut reference = Vec::<Entry>::new();
+    {
+        let mut w = Writer::new(&mut store).blocksize(128);
+        for i in 1..10 {
+            let e = Entry::new(u32::to_be_bytes(i), u32::to_be_bytes(i * 1024));
+            w.add(e.clone()).expect("add failed");
+            reference.push(e);
+        }
+    }
+
+    assert!(store.len() > 512);
+    let r = Reader::new(&store);
+    let ri = r.iter();
+    assert_eq!(ri.collect::<Vec<Entry>>(), reference);
+}