From a91888a377743ae39ad162852d3ad4773e0a26f6 Mon Sep 17 00:00:00 2001 From: Chris Mikkelson Date: Wed, 4 Sep 2024 14:30:02 -0500 Subject: [PATCH] WIP: sorter implementation --- src/lib.rs | 1 + src/sorter.rs | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 src/sorter.rs diff --git a/src/lib.rs b/src/lib.rs index 2d4c198..6d58274 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,7 @@ mod iter; mod merge_func; mod merger; pub mod reader; +pub mod sorter; pub mod source; mod writer; diff --git a/src/sorter.rs b/src/sorter.rs new file mode 100644 index 0000000..d1e8aac --- /dev/null +++ b/src/sorter.rs @@ -0,0 +1,51 @@ +use crate::merge_func::MergeFunc; +use crate::{Entry, Merger, Reader, Source, Writer}; +use memmap::Mmap; + +/* +Sorter + method add(&mut self, e: Entry); + method source(self) -> impl Source +*/ +pub struct Sorter { + batch: Vec, + batch_size: usize, + max_size: usize, + merge_func: F, + + readers: Vec>, +} + +impl Sorter +where + F: Fn(&mut Entry, &Entry), +{ + pub fn new(max_size: usize, merge_func: F) -> Self { + Self { + batch: Vec::new(), + batch_size: 0, + max_size, + merge_func, + + readers: Vec::new(), + } + } + + pub fn add(&mut self, e: Entry) { + let esize = e.key.len() + e.value.len(); + if esize + self.batch_size > self.max_size { + self.write_chunk(); + } + self.batch.push(e); + self.batch_size += esize; + } + + pub fn source(mut self) -> MergeFunc>, F> { + if self.batch.len() > 0 { + self.write_chunk(); + } + Merger::from(self.readers).merge_func(self.merge_func) + } + + fn write_chunk(&mut self) {} +} -- 2.50.1