From b2441bc245b7269871482e50a462b19a5702109b Mon Sep 17 00:00:00 2001 From: Chris Mikkelson Date: Sat, 19 Oct 2024 14:22:33 +0200 Subject: [PATCH] Make Iter, Source generic on item type This is to allow mapping items to higher level types while preserving seek functionality. --- src/dupsort_func.rs | 51 ++++++++++++++++++++++++++-------------- src/entry.rs | 31 +++++++++++++++++++++++- src/filter.rs | 34 ++++++++++++++++++--------- src/iter.rs | 57 ++++++++++++++++++++++++++++----------------- src/merge_func.rs | 13 ++++++----- src/merger.rs | 20 ++++++++-------- src/reader/mod.rs | 4 +++- src/sorter.rs | 2 +- src/source.rs | 30 +++++++++++++++++------- 9 files changed, 165 insertions(+), 77 deletions(-) diff --git a/src/dupsort_func.rs b/src/dupsort_func.rs index 589e8ed..c029c29 100644 --- a/src/dupsort_func.rs +++ b/src/dupsort_func.rs @@ -1,10 +1,11 @@ -use crate::{Entry, Iter, Source}; +use crate::entry::HasPrefix; +use crate::{Iter, Source}; use std::cmp::Ordering; pub struct DupsortFunc where S: Source, - F: Fn(&Entry, &Entry) -> Ordering, + F: Fn(&S::Item, &S::Item) -> Ordering, { source: S, dupsort_func: F, @@ -13,7 +14,7 @@ where impl DupsortFunc where S: Source, - F: Fn(&Entry, &Entry) -> Ordering, + F: Fn(&S::Item, &S::Item) -> Ordering, { pub fn new(source: S, dupsort_func: F) -> Self { Self { @@ -26,20 +27,31 @@ where impl Source for DupsortFunc where S: Source, - F: Fn(&Entry, &Entry) -> Ordering, + S::Item: PartialEq, + F: Fn(&S::Item, &S::Item) -> Ordering, { - fn iter(&self) -> impl Iter { + type Item = S::Item; + fn iter(&self) -> impl Iter { self.source.iter().dupsort_func(&self.dupsort_func) } - fn get(&self, key: &[u8]) -> impl Iter { + fn get(&self, key: &[u8]) -> impl Iter + where + S::Item: PartialOrd<[u8]>, + { self.source.get(key).dupsort_func(&self.dupsort_func) } - fn get_prefix(&self, prefix: &[u8]) -> impl Iter { + fn get_prefix(&self, prefix: &[u8]) -> impl Iter + where + S::Item: HasPrefix, + { self.source .get_prefix(prefix) .dupsort_func(&self.dupsort_func) } - fn get_range(&self, start: &[u8], end: &[u8]) -> impl Iter { + fn get_range(&self, start: &[u8], end: &[u8]) -> impl Iter + where + S::Item: PartialOrd<[u8]>, + { self.source .get_range(start, end) .dupsort_func(&self.dupsort_func) @@ -49,17 +61,19 @@ where #[derive(Debug)] pub struct DupsortFuncIter<'a, I, F> where - F: Fn(&Entry, &Entry) -> Ordering, + I: Iter, + F: Fn(&I::Item, &I::Item) -> Ordering, { - run: Vec, - next: Option, + run: Vec, + next: Option, iter: I, dupsort_func: &'a F, } impl<'a, I, F> DupsortFuncIter<'a, I, F> where - F: Fn(&Entry, &Entry) -> Ordering, + I: Iter, + F: Fn(&I::Item, &I::Item) -> Ordering, { pub fn new(iter: I, dupsort_func: &'a F) -> Self { Self { @@ -73,10 +87,11 @@ where impl<'a, I, F> Iterator for DupsortFuncIter<'a, I, F> where - I: Iterator, - F: Fn(&Entry, &Entry) -> Ordering, + I: Iter, + I::Item: PartialEq, + F: Fn(&I::Item, &I::Item) -> Ordering, { - type Item = Entry; + type Item = I::Item; fn next(&mut self) -> Option { self.run.pop().or_else(|| { @@ -85,7 +100,7 @@ where // println!("2: {:?} / {:?}", self.next, self.run); while let Some(e) = self.iter.next() { - if e.key() == self.run[0].key() { + if e == self.run[0] { self.run.push(e); continue; } @@ -103,7 +118,8 @@ where impl<'a, I, F> Iter for DupsortFuncIter<'a, I, F> where I: Iter, - F: Fn(&Entry, &Entry) -> Ordering, + I::Item: PartialEq, + F: Fn(&I::Item, &I::Item) -> Ordering, { fn seek(&mut self, key: &[u8]) { self.run.clear(); @@ -115,6 +131,7 @@ where #[test] fn test_dupsort() { use crate::source::test_source::TestSource; + use crate::Entry; let ts = TestSource( (1u8..10) diff --git a/src/entry.rs b/src/entry.rs index 491a294..9fe4fc6 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -1,6 +1,7 @@ +use std::cmp::Ordering; use std::sync::Arc; -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, Eq)] pub struct Entry { key: Arc>, value: Arc>, @@ -44,3 +45,31 @@ impl Entry { Arc::make_mut(&mut self.value) } } + +impl PartialOrd<[u8]> for Entry { + fn partial_cmp(&self, other: &[u8]) -> Option { + Some(self.key().cmp(other)) + } +} + +impl PartialEq<[u8]> for Entry { + fn eq(&self, other: &[u8]) -> bool { + self.key() == other + } +} + +impl PartialEq for Entry { + fn eq(&self, other: &Entry) -> bool { + self.key() == other.key() + } +} + +pub trait HasPrefix { + fn has_prefix(&self, prefix: &[u8]) -> bool; +} + +impl HasPrefix for Entry { + fn has_prefix(&self, prefix: &[u8]) -> bool { + self.key().starts_with(prefix) + } +} diff --git a/src/filter.rs b/src/filter.rs index ddcdac2..eb31ee1 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -1,4 +1,4 @@ -use crate::{Entry, Iter, Source}; +use crate::{entry::HasPrefix, Iter, Source}; pub struct FilterIter<'a, I, F> { iter: I, @@ -7,7 +7,8 @@ pub struct FilterIter<'a, I, F> { impl<'a, I, F> FilterIter<'a, I, F> where - F: Fn(&Entry, &mut dyn Iter) -> bool, + I: Iter, + F: Fn(&I::Item, &mut dyn Iter) -> bool, { pub fn new(iter: I, filter_func: &'a F) -> Self { Self { iter, filter_func } @@ -16,10 +17,10 @@ where impl<'a, I, F> Iterator for FilterIter<'a, I, F> where - F: Fn(&Entry, &mut dyn Iter) -> bool, I: Iter, + F: Fn(&I::Item, &mut dyn Iter) -> bool, { - type Item = Entry; + type Item = I::Item; fn next(&mut self) -> Option { while let Some(e) = self.iter.next() { @@ -34,7 +35,7 @@ where impl<'a, I, F> Iter for FilterIter<'a, I, F> where I: Iter, - F: Fn(&Entry, &mut dyn Iter) -> bool, + F: Fn(&I::Item, &mut dyn Iter) -> bool, { fn seek(&mut self, key: &[u8]) { self.iter.seek(key); @@ -49,7 +50,7 @@ pub struct FilterSource { impl FilterSource where S: Source, - F: Fn(&Entry, &mut dyn Iter) -> bool, + F: Fn(&S::Item, &mut dyn Iter) -> bool, { pub fn new(source: S, filter_func: F) -> Self { Self { @@ -62,23 +63,33 @@ where impl Source for FilterSource where S: Source, - F: Fn(&Entry, &mut dyn Iter) -> bool, + F: Fn(&S::Item, &mut dyn Iter) -> bool, { - fn iter(&self) -> impl Iter { + type Item = S::Item; + fn iter(&self) -> impl Iter { self.source.iter().filter_func(&self.filter_func) } - fn get(&self, key: &[u8]) -> impl Iter { + fn get(&self, key: &[u8]) -> impl Iter + where + Self::Item: PartialOrd<[u8]>, + { self.source.get(key).filter_func(&self.filter_func) } - fn get_prefix(&self, prefix: &[u8]) -> impl Iter { + fn get_prefix(&self, prefix: &[u8]) -> impl Iter + where + Self::Item: HasPrefix, + { self.source .get_prefix(prefix) .filter_func(&self.filter_func) } - fn get_range(&self, start: &[u8], end: &[u8]) -> impl Iter { + fn get_range(&self, start: &[u8], end: &[u8]) -> impl Iter + where + Self::Item: PartialOrd<[u8]>, + { self.source .get_range(start, end) .filter_func(&self.filter_func) @@ -88,6 +99,7 @@ where #[test] fn test_filter() { use crate::source::test_source::TestSource; + use crate::Entry; let ts = TestSource( (0u8..10) diff --git a/src/iter.rs b/src/iter.rs index eeaecec..d47fa06 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -1,11 +1,12 @@ use crate::dupsort_func::DupsortFuncIter; +use crate::entry::HasPrefix; use crate::filter::FilterIter; use crate::merge_func::MergeFuncIter; use crate::Entry; use std::cmp::Ordering; use std::iter::Iterator; -pub trait Iter: Iterator { +pub trait Iter: Iterator { fn seek(&mut self, key: &[u8]); fn merge_func<'a, F>(self, merge_func: &'a F) -> MergeFuncIter<'a, Self, F> @@ -18,7 +19,7 @@ pub trait Iter: Iterator { fn dupsort_func<'a, F>(self, dupsort_func: &'a F) -> DupsortFuncIter<'a, Self, F> where - F: Fn(&Entry, &Entry) -> Ordering, + F: Fn(&Self::Item, &Self::Item) -> Ordering, Self: Sized, { DupsortFuncIter::new(self, dupsort_func) @@ -26,27 +27,23 @@ pub trait Iter: Iterator { fn filter_func<'a, F>(self, filter_func: &'a F) -> FilterIter<'a, Self, F> where - F: Fn(&Entry, &mut dyn Iter) -> bool, + F: Fn(&Self::Item, &mut dyn Iter) -> bool, Self: Sized, { FilterIter::new(self, filter_func) } } -pub type BoxedIter<'a> = Box; - -impl<'a> Iter for BoxedIter<'a> { - fn seek(&mut self, key: &[u8]) { - self.as_mut().seek(key); - } -} - -pub struct PrefixIter { +pub struct PrefixIter { iter: I, prefix: Vec, } -impl PrefixIter { +impl PrefixIter +where + I: Iter, + E: HasPrefix, +{ pub fn new(mut iter: I, prefix: impl AsRef<[u8]>) -> Self { iter.seek(prefix.as_ref()); Self { @@ -56,11 +53,15 @@ impl PrefixIter { } } -impl Iterator for PrefixIter { - type Item = Entry; +impl Iterator for PrefixIter +where + I: Iter, + E: HasPrefix, +{ + type Item = E; fn next(&mut self) -> Option { let item = self.iter.next()?; - if item.key().starts_with(self.prefix.as_slice()) { + if item.has_prefix(self.prefix.as_slice()) { Some(item) } else { None @@ -68,13 +69,17 @@ impl Iterator for PrefixIter { } } -impl Iter for PrefixIter { +impl Iter for PrefixIter +where + I: Iter, + E: HasPrefix, +{ fn seek(&mut self, key: &[u8]) { self.iter.seek(key); } } -pub struct RangeIter { +pub struct RangeIter { iter: I, start: Vec, end: Vec, @@ -91,11 +96,15 @@ impl RangeIter { } } -impl Iterator for RangeIter { - type Item = Entry; +impl Iterator for RangeIter +where + I: Iter, + E: PartialOrd<[u8]>, +{ + type Item = E; fn next(&mut self) -> Option { let item = self.iter.next()?; - if item.key() <= self.end.as_slice() { + if item <= *self.end.as_slice() { Some(item) } else { None @@ -103,7 +112,11 @@ impl Iterator for RangeIter { } } -impl Iter for RangeIter { +impl Iter for RangeIter +where + I: Iter, + E: PartialOrd<[u8]>, +{ fn seek(&mut self, key: &[u8]) { if key <= self.start.as_slice() { self.iter.seek(self.start.as_slice()); diff --git a/src/merge_func.rs b/src/merge_func.rs index b8d847f..c0800ea 100644 --- a/src/merge_func.rs +++ b/src/merge_func.rs @@ -17,19 +17,20 @@ where impl Source for MergeFunc where - S: Source, + S: Source, F: Fn(&mut Vec, &Entry), { - fn iter(&self) -> impl Iter { + type Item = Entry; + fn iter(&self) -> impl Iter { self.source.iter().merge_func(&self.merge_func) } - fn get(&self, key: &[u8]) -> impl Iter { + fn get(&self, key: &[u8]) -> impl Iter { self.source.get(key).merge_func(&self.merge_func) } - fn get_prefix(&self, prefix: &[u8]) -> impl Iter { + fn get_prefix(&self, prefix: &[u8]) -> impl Iter { self.source.get_prefix(prefix).merge_func(&self.merge_func) } - fn get_range(&self, start: &[u8], end: &[u8]) -> impl Iter { + fn get_range(&self, start: &[u8], end: &[u8]) -> impl Iter { self.source .get_range(start, end) .merge_func(&self.merge_func) @@ -77,7 +78,7 @@ where impl<'a, I, F> Iter for MergeFuncIter<'a, I, F> where - I: Iter, + I: Iter, F: Fn(&mut Vec, &Entry), { fn seek(&mut self, key: &[u8]) { diff --git a/src/merger.rs b/src/merger.rs index 558a672..8d54303 100644 --- a/src/merger.rs +++ b/src/merger.rs @@ -2,13 +2,12 @@ use crate::{Entry, Iter, Source}; use std::cmp::Ordering; use std::collections::BinaryHeap; -pub struct Merger { +pub struct Merger { sources: Vec, } impl From for Merger where - S: Source, I: IntoIterator, { fn from(i: I) -> Self { @@ -51,7 +50,7 @@ pub struct MergeIter { impl From for MergeIter where I: Iterator, - I::Item: Iter, + I::Item: Iter, { fn from(iters: I) -> Self { let mut v: Vec = Vec::new(); @@ -70,7 +69,7 @@ where } } -impl Iterator for MergeIter { +impl> Iterator for MergeIter { type Item = Entry; fn next(&mut self) -> Option { @@ -90,7 +89,7 @@ impl Iterator for MergeIter { } } -impl Iter for MergeIter { +impl> Iter for MergeIter { fn seek(&mut self, key: &[u8]) { if key > self.last_key.as_slice() { loop { @@ -135,20 +134,21 @@ impl Iter for MergeIter { } } -impl Source for Merger { - fn iter(&self) -> impl Iter { +impl> Source for Merger { + type Item = Entry; + fn iter(&self) -> impl Iter { MergeIter::from(self.sources.iter().map(|s| s.iter())) } - fn get(&self, key: &[u8]) -> impl Iter { + fn get(&self, key: &[u8]) -> impl Iter { MergeIter::from(self.sources.iter().map(|s| s.get(key))) } - fn get_prefix(&self, prefix: &[u8]) -> impl Iter { + fn get_prefix(&self, prefix: &[u8]) -> impl Iter { MergeIter::from(self.sources.iter().map(|s| s.get_prefix(prefix))) } - fn get_range(&self, start: &[u8], end: &[u8]) -> impl Iter { + fn get_range(&self, start: &[u8], end: &[u8]) -> impl Iter { MergeIter::from(self.sources.iter().map(|s| s.get_range(start, end))) } } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index c79d3f4..8a1568c 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -177,7 +177,9 @@ impl> Iter for ReaderIter { } impl> Source for Reader { - fn iter(&self) -> impl Iter { + type Item = Entry; + + fn iter(&self) -> impl Iter { ReaderIter { reader: self.clone(), next_offset: 0, diff --git a/src/sorter.rs b/src/sorter.rs index 232bbfc..7685530 100644 --- a/src/sorter.rs +++ b/src/sorter.rs @@ -35,7 +35,7 @@ where self.batch_size += esize; } - pub fn source(mut self) -> impl Source { + pub fn source(mut self) -> impl Source { if self.batch.get_mut().len() > 0 { self.write_chunk(); } diff --git a/src/source.rs b/src/source.rs index 6d590b5..55e0317 100644 --- a/src/source.rs +++ b/src/source.rs @@ -1,18 +1,30 @@ use crate::dupsort_func::DupsortFunc; +use crate::entry::HasPrefix; use crate::filter::FilterSource; use crate::iter::{PrefixIter, RangeIter}; use crate::merge_func::MergeFunc; use crate::{Entry, Iter}; pub trait Source { - fn iter(&self) -> impl Iter; - fn get(&self, key: &[u8]) -> impl Iter { + type Item; + + fn iter(&self) -> impl Iter; + fn get(&self, key: &[u8]) -> impl Iter + where + Self::Item: PartialOrd<[u8]>, + { RangeIter::new(self.iter(), key, key) } - fn get_prefix(&self, prefix: &[u8]) -> impl Iter { + fn get_prefix(&self, prefix: &[u8]) -> impl Iter + where + Self::Item: HasPrefix, + { PrefixIter::new(self.iter(), prefix) } - fn get_range(&self, start: &[u8], end: &[u8]) -> impl Iter { + fn get_range(&self, start: &[u8], end: &[u8]) -> impl Iter + where + Self::Item: PartialOrd<[u8]>, + { RangeIter::new(self.iter(), start, end) } @@ -27,7 +39,7 @@ pub trait Source { fn dupsort_func(self, dupsort_func: F) -> DupsortFunc where Self: Sized, - F: Fn(&Entry, &Entry) -> std::cmp::Ordering, + F: Fn(&Self::Item, &Self::Item) -> std::cmp::Ordering, { DupsortFunc::new(self, dupsort_func) } @@ -35,7 +47,7 @@ pub trait Source { fn filter(self, filter_func: F) -> FilterSource where Self: Sized, - F: Fn(&Entry, &mut dyn Iter) -> bool, + F: Fn(&Self::Item, &mut dyn Iter) -> bool, { FilterSource::new(self, filter_func) } @@ -76,7 +88,8 @@ impl<'a> Iter for VecIter<'a> { } impl Source for Vec { - fn iter(&self) -> impl Iter { + type Item = Entry; + fn iter(&self) -> impl Iter { VecIter { index: 0, vec: self, @@ -121,7 +134,8 @@ pub mod test_source { } impl Source for TestSource { - fn iter(&self) -> impl Iter { + type Item = Entry; + fn iter(&self) -> impl Iter { TestIter { source: self, off: 0, -- 2.50.1