]> git.mikk.net Git - mtbl-rs/commitdiff
WIP: sorter implementation
authorChris Mikkelson <cmikk@fsi.io>
Wed, 4 Sep 2024 19:30:02 +0000 (14:30 -0500)
committerChris Mikkelson <cmikk@fsi.io>
Wed, 4 Sep 2024 19:30:02 +0000 (14:30 -0500)
src/lib.rs
src/sorter.rs [new file with mode: 0644]

index 2d4c1985fb06f37357a4fef14dbb4106a9a98422..6d58274c4142f8a877420cd25561b78ebb9eb4e3 100644 (file)
@@ -5,6 +5,7 @@ mod iter;
 mod merge_func;
 mod merger;
 pub mod reader;
+pub mod sorter;
 pub mod source;
 mod writer;
 
diff --git a/src/sorter.rs b/src/sorter.rs
new file mode 100644 (file)
index 0000000..d1e8aac
--- /dev/null
@@ -0,0 +1,51 @@
+use crate::merge_func::MergeFunc;
+use crate::{Entry, Merger, Reader, Source, Writer};
+use memmap::Mmap;
+
+/*
+Sorter
+   method add(&mut self, e: Entry);
+   method source(self) -> impl Source
+*/
+pub struct Sorter<F: Fn(&mut Entry, &Entry)> {
+    batch: Vec<Entry>,
+    batch_size: usize,
+    max_size: usize,
+    merge_func: F,
+
+    readers: Vec<Reader<Mmap>>,
+}
+
+impl<F> Sorter<F>
+where
+    F: Fn(&mut Entry, &Entry),
+{
+    pub fn new(max_size: usize, merge_func: F) -> Self {
+        Self {
+            batch: Vec::new(),
+            batch_size: 0,
+            max_size,
+            merge_func,
+
+            readers: Vec::new(),
+        }
+    }
+
+    pub fn add(&mut self, e: Entry) {
+        let esize = e.key.len() + e.value.len();
+        if esize + self.batch_size > self.max_size {
+            self.write_chunk();
+        }
+        self.batch.push(e);
+        self.batch_size += esize;
+    }
+
+    pub fn source(mut self) -> MergeFunc<Merger<Reader<Mmap>>, F> {
+        if self.batch.len() > 0 {
+            self.write_chunk();
+        }
+        Merger::from(self.readers).merge_func(self.merge_func)
+    }
+
+    fn write_chunk(&mut self) {}
+}