+++ /dev/null
-use std::cmp::Ordering;
-pub use std::rc::Rc;
-
-#[derive(Debug, Clone)]
-pub struct Entry {
- pub key: Rc<Vec<u8>>,
- pub value: Rc<Vec<u8>>,
-}
-
-impl Entry {
- pub fn new() -> Entry {
- Entry {
- key: Rc::new(Vec::new()),
- value: Rc::new(Vec::new()),
- }
- }
- pub fn from_key_value(k: impl AsRef<[u8]>, v: impl AsRef<[u8]>) -> Entry {
- let mut key = Vec::<u8>::with_capacity(k.as_ref().len());
- let mut value = Vec::<u8>::with_capacity(v.as_ref().len());
-
- key.extend_from_slice(k.as_ref());
- value.extend_from_slice(v.as_ref());
-
- Entry {
- key: Rc::new(key),
- value: Rc::new(value),
- }
- }
-
- pub fn unpack(&self) -> (&[u8], &[u8]) {
- (self.key.as_slice(), self.value.as_slice())
- }
-}
-
-// Entries are ordered and compared by key only
-impl PartialOrd for Entry {
- fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
- self.key.partial_cmp(&other.key)
- }
-}
-
-impl PartialEq for Entry {
- fn eq(&self, other: &Self) -> bool {
- self.key == other.key
- }
-}
-
-#[cfg(test)]
-mod test {
-
- use crate::*;
-
- struct TestIter(Entry);
-
- impl TestIter {
- fn new() -> TestIter {
- TestIter(Entry::new())
- }
- }
-
- impl Iterator for TestIter {
- type Item = Entry;
-
- fn next(&mut self) -> Option<Entry> {
- println!("key strong_count = {}", Rc::strong_count(&self.0.key));
- Rc::make_mut(&mut self.0.key).push(0);
- Some(self.0.clone())
- }
- }
-
- #[test]
- fn test_iter() {
- for e in TestIter::new() {
- if e.key.len() > 3 {
- break;
- }
- }
- }
-
- #[test]
- fn test_iter_filter() {
- for e in TestIter::new().filter(|x: &Entry| x.key.len() % 2 == 0) {
- println!("{:?}", e);
- if e.key.len() > 6 {
- break;
- }
- }
- }
-}
+++ /dev/null
-use crate::entry::Entry;
-use std::cell::RefCell;
-use std::iter::Iterator;
-use std::rc::Rc;
-
-pub trait Entries: Sized {
- fn seek(&mut self, key: impl AsRef<[u8]>);
-
- fn iter_next(&mut self) -> Option<Entry>;
-
- fn wrap_iter<F: FnOnce(Iter<IterCell<Self>>) -> O, O: Iterator<Item = Entry>>(
- self,
- f: F,
- ) -> Seekable<WrapIter<Self, O>> {
- let b = IterCell::new(self);
- let it = f(b.clone().into_iter());
- Seekable(WrapIter {
- inner: b,
- outer: it,
- })
- }
-
- fn filter<F>(self, filter: F) -> Seekable<Filter<Self, F>>
- where
- F: FnMut(&Entry, &mut Vec<u8>) -> FilterAction,
- {
- Seekable(Filter {
- inner: self,
- filter_func: filter,
- seek_key: Vec::new(),
- })
- }
-}
-
-pub struct Seekable<T: Entries>(T);
-
-impl<T: Entries> IntoIterator for Seekable<T> {
- type Item = Entry;
- type IntoIter = Iter<T>;
-
- fn into_iter(self) -> Self::IntoIter {
- Iter(self.0)
- }
-}
-
-impl<T: Entries> Entries for Seekable<T> {
- fn seek(&mut self, key: impl AsRef<[u8]>) {
- self.0.seek(key)
- }
-
- fn iter_next(&mut self) -> Option<Entry> {
- self.0.iter_next()
- }
-}
-
-pub struct Iter<T: Entries>(T);
-
-impl<T: Entries> Iterator for Iter<T> {
- type Item = Entry;
-
- fn next(&mut self) -> Option<Entry> {
- self.0.iter_next()
- }
-}
-
-// WrapIter
-pub struct IterCell<I: Entries> {
- ic: Rc<RefCell<I>>,
-}
-
-impl<I: Entries> Clone for IterCell<I> {
- fn clone(&self) -> Self {
- IterCell {
- ic: self.ic.clone(),
- }
- }
-}
-
-impl<I: Entries> IterCell<I> {
- fn new(i: I) -> IterCell<I> {
- IterCell {
- ic: Rc::new(RefCell::new(i)),
- }
- }
-}
-
-impl<I: Entries> IntoIterator for IterCell<I> {
- type Item = Entry;
- type IntoIter = Iter<IterCell<I>>;
-
- fn into_iter(self) -> Self::IntoIter {
- Iter(self)
- }
-}
-
-impl<I: Entries> Entries for IterCell<I> {
- fn seek(&mut self, key: impl AsRef<[u8]>) {
- self.ic.borrow_mut().seek(key);
- }
-
- fn iter_next(&mut self) -> Option<Entry> {
- self.ic.borrow_mut().iter_next()
- }
-}
-
-pub struct WrapIter<E: Entries, O: Iterator<Item = Entry>> {
- inner: IterCell<E>,
- outer: O,
-}
-
-impl<I: Entries, O: Iterator<Item = Entry>> Entries for WrapIter<I, O> {
- fn seek(&mut self, key: impl AsRef<[u8]>) {
- self.inner.seek(key)
- }
-
- fn iter_next(&mut self) -> Option<Entry> {
- self.outer.next()
- }
-}
-
-// Filter
-
-pub enum FilterAction {
- Keep,
- Skip,
- Seek,
-}
-pub use FilterAction::*;
-
-pub struct Filter<I: Entries, F: FnMut(&Entry, &mut Vec<u8>) -> FilterAction> {
- inner: I,
- filter_func: F,
- seek_key: Vec<u8>,
-}
-
-impl<I, F> Entries for Filter<I, F>
-where
- I: Entries,
- F: FnMut(&Entry, &mut Vec<u8>) -> FilterAction,
-{
- fn seek(&mut self, key: impl AsRef<[u8]>) {
- self.inner.seek(key.as_ref());
- }
-
- fn iter_next(&mut self) -> Option<Entry> {
- self.seek_key.clear();
- while let Some(e) = self.inner.iter_next() {
- match (self.filter_func)(&e, &mut self.seek_key) {
- Skip => continue,
- Keep => return Some(e),
- Seek => self.inner.seek(self.seek_key.as_slice()),
- }
- }
- None
- }
-}
-
-#[cfg(test)]
-mod test {
- use super::*;
-
- struct TestIter(u8);
-
- impl Entries for TestIter {
- fn iter_next(&mut self) -> Option<Entry> {
- match self.0 {
- 255 => None,
- _ => {
- let res = Entry::from_key_value(vec![self.0], vec![self.0]);
- self.0 = self.0 + 1;
- Some(res)
- }
- }
- }
-
- fn seek(&mut self, kr: impl AsRef<[u8]>) {
- self.0 = kr.as_ref()[0];
- }
- }
-
- #[test]
- fn test_iter_filter() {
- let ti = TestIter(0);
- let v: Vec<Entry> = ti
- .filter(|e, k| {
- let b = e.key[0];
- if b % 2 > 0 {
- if b < 60 {
- k.push((b + 1) * 4);
- return Seek;
- }
- return Skip;
- }
- Keep
- })
- .into_iter()
- .filter(|e| e.key[0] % 5 == 0)
- .collect();
- println!("\n\nTEST: iter_filter");
- println!("length = {}", v.len());
- assert!(v.into_iter().all(|e| e.key[0] % 2 == 0));
- }
-
- #[test]
- fn test_iter_wrap() {
- let ti = TestIter(0);
- let mut mi = ti.wrap_iter(|i| i.filter(|e| e.key[0] % 2 == 0));
- mi.seek(&[100]);
- let v: Vec<Entry> = mi.into_iter().collect();
- println!("\n\nTEST: iter_wrap");
- println!("{:?}", v);
- }
-}
-pub mod entry;
-pub mod iter;
-
-pub use entry::*;
-pub use iter::*;
+pub mod seekable;
+pub use seekable::Seekable;
--- /dev/null
+mod filter_map;
+mod merge;
+pub use filter_map::FilterMap;
+pub use merge::Merger;
+
+pub trait Seekable: Sized {
+ type Key: Ord;
+ type Value;
+
+ fn next(&mut self) -> Option<(Self::Key, Self::Value)>;
+ fn seek(&mut self, key: &Self::Key);
+
+ fn filter_map<F, K, V>(self, func: F) -> FilterMap<Self, F, K, V>
+ where
+ K: Ord + AsRef<Self::Key>,
+ F: FnMut((Self::Key, Self::Value), &mut Self) -> Option<(K, V)>,
+ {
+ FilterMap { next: self, func }
+ }
+
+ fn merge<L>(iter: L) -> Merger<Self>
+ where
+ L: Iterator<Item = Self>,
+ {
+ merge::merge(iter)
+ }
+}
+
+#[derive(Debug)]
+pub struct Iter<T>(T);
+
+impl<T> Iterator for Iter<T>
+where
+ T: Seekable,
+{
+ type Item = (T::Key, T::Value);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ self.0.next()
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::{Iter, Seekable};
+
+ struct Empty;
+ impl IntoIterator for Empty {
+ type Item = ((), ());
+ type IntoIter = Iter<Empty>;
+ fn into_iter(self) -> Iter<Empty> {
+ Iter(self)
+ }
+ }
+
+ impl Seekable for Empty {
+ type Value = ();
+ type Key = ();
+
+ fn next(&mut self) -> Option<((), ())> {
+ None
+ }
+ fn seek(&mut self, _key: &Self::Key) {}
+ }
+}
--- /dev/null
+use crate::seekable::{Iter, Seekable};
+
+pub struct FilterMap<S, F, K, V>
+where
+ S: Seekable,
+ K: Ord + AsRef<S::Key>,
+ F: FnMut((S::Key, S::Value), &mut S) -> Option<(K, V)>,
+{
+ pub(crate) next: S,
+ pub(crate) func: F,
+}
+
+impl<S, F, K, V> IntoIterator for FilterMap<S, F, K, V>
+where
+ S: Seekable,
+ K: Ord + AsRef<S::Key>,
+ F: FnMut((S::Key, S::Value), &mut S) -> Option<(K, V)>,
+{
+ type Item = (K, V);
+ type IntoIter = Iter<Self>;
+ fn into_iter(self) -> Iter<Self> {
+ Iter(self)
+ }
+}
+
+impl<S, F, K, V> Seekable for FilterMap<S, F, K, V>
+where
+ S: Seekable,
+ K: Ord + AsRef<S::Key>,
+ F: FnMut((S::Key, S::Value), &mut S) -> Option<(K, V)>,
+{
+ type Key = K;
+ type Value = V;
+
+ fn next(&mut self) -> Option<(K, V)> {
+ loop {
+ match self.next.next() {
+ None => return None,
+ Some(v) => {
+ if let Some(i) = (self.func)(v, &mut self.next) {
+ return Some(i);
+ }
+ }
+ }
+ }
+ }
+
+ fn seek(&mut self, key: &K) {
+ self.next.seek(key.as_ref())
+ }
+}
--- /dev/null
+use crate::seekable::{Iter, Seekable};
+use std::cell::RefCell;
+use std::cmp::Ordering;
+use std::collections::BinaryHeap;
+
+pub struct Merger<S>
+where
+ S: Seekable,
+{
+ active: BinaryHeap<MergerEnt<S>>,
+ ended: Vec<S>,
+}
+
+struct MergerEnt<S: Seekable> {
+ cur_item: RefCell<(S::Key, S::Value)>,
+ rest: S,
+}
+
+impl<S: Seekable> Eq for MergerEnt<S> {}
+impl<S: Seekable> PartialEq for MergerEnt<S> {
+ fn eq(&self, other: &Self) -> bool {
+ let sent = self.cur_item.borrow();
+ let oent = other.cur_item.borrow();
+ sent.0.eq(&oent.0)
+ }
+}
+
+// Because std::collections::BinaryHeap implements a max
+// heap and we need a min-heap, we reverse the sense of Ord
+// for MergerEnt<S> vs. S::Key. We do this instead of using
+// std::cmp::Reverse to avoid the additional packing/unpacking
+// boilerplate.
+impl<S: Seekable> PartialOrd for MergerEnt<S> {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ let sent = self.cur_item.borrow();
+ let oent = other.cur_item.borrow();
+ oent.0.partial_cmp(&sent.0)
+ }
+}
+
+impl<S: Seekable> Ord for MergerEnt<S> {
+ fn cmp(&self, other: &Self) -> Ordering {
+ let sent = self.cur_item.borrow();
+ let oent = other.cur_item.borrow();
+ oent.0.cmp(&sent.0)
+ }
+}
+
+impl<S: Seekable> MergerEnt<S> {
+ fn new(mut source: S) -> Option<MergerEnt<S>> {
+ Some(MergerEnt {
+ cur_item: RefCell::new(source.next()?),
+ rest: source,
+ })
+ }
+
+ fn replace(&mut self, v: (S::Key, S::Value)) -> (S::Key, S::Value) {
+ self.cur_item.replace(v)
+ }
+}
+
+pub fn merge<L, S>(sources: L) -> Merger<S>
+where
+ L: Iterator<Item = S>,
+ S: Seekable,
+{
+ Merger {
+ active: BinaryHeap::from(
+ sources
+ .into_iter()
+ .filter_map(|i| Some(MergerEnt::new(i)?))
+ .collect::<Vec<MergerEnt<S>>>(),
+ ),
+ ended: Vec::new(),
+ }
+}
+
+impl<S: Seekable> IntoIterator for Merger<S> {
+ type Item = (S::Key, S::Value);
+ type IntoIter = Iter<Self>;
+ fn into_iter(self) -> Self::IntoIter {
+ Iter(self)
+ }
+}
+
+fn heap_reset<S: Seekable>(m: &mut Merger<S>, key: &S::Key) {
+ let mut active = Vec::<MergerEnt<S>>::new();
+ let mut ended = Vec::<S>::new();
+ while let Some(e) = m.active.pop() {
+ m.ended.push(e.rest);
+ }
+ while let Some(mut s) = m.ended.pop() {
+ s.seek(key);
+ if let Some(next_item) = s.next() {
+ active.push(MergerEnt {
+ cur_item: RefCell::new(next_item),
+ rest: s,
+ });
+ } else {
+ ended.push(s)
+ }
+ }
+
+ m.ended = ended;
+ m.active = BinaryHeap::from(active);
+}
+
+// forward seek within the heap
+fn heap_seek<S: Seekable>(m: &mut Merger<S>, key: &S::Key) -> Option<()> {
+ loop {
+ let mut head = m.active.peek_mut()?;
+ if head.cur_item.borrow().0 >= *key {
+ break;
+ }
+ head.rest.seek(key);
+ if let Some(next) = head.rest.next() {
+ head.replace(next);
+ continue;
+ }
+ drop(head); // release heap for modification
+ let head = m.active.pop()?;
+ m.ended.push(head.rest);
+ }
+ None
+}
+
+impl<S: Seekable> Seekable for Merger<S> {
+ type Key = S::Key;
+ type Value = S::Value;
+
+ fn next(&mut self) -> Option<(Self::Key, Self::Value)> {
+ let mut head = self.active.peek_mut()?;
+ if let Some(next) = head.rest.next() {
+ return Some(head.replace(next));
+ }
+ drop(head); // release heap for mutation below
+ let head = self.active.pop()?;
+ self.ended.push(head.rest);
+ Some(head.cur_item.into_inner())
+ }
+
+ fn seek(&mut self, key: &Self::Key) {
+ /* seek forward only */
+ let head = self.active.peek_mut();
+ if head.is_none() {
+ return;
+ }
+ if head.unwrap().cur_item.borrow().0 > *key {
+ heap_reset(self, key);
+ return;
+ }
+ heap_seek(self, key);
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use crate::seekable::{Iter, Seekable};
+ use std::cmp::Ordering;
+
+ #[derive(Debug)]
+ struct SeekableVec<T: Ord + Copy> {
+ off: usize,
+ v: Vec<T>,
+ }
+ impl<T: Ord + Copy> IntoIterator for SeekableVec<T> {
+ type Item = (T, ());
+ type IntoIter = Iter<Self>;
+ fn into_iter(self) -> Iter<Self> {
+ Iter(self)
+ }
+ }
+ impl<T: Ord + Copy> Seekable for SeekableVec<T> {
+ type Key = T;
+ type Value = ();
+ fn next(&mut self) -> Option<(T, ())> {
+ if self.off >= self.v.len() {
+ return None;
+ }
+ let v = self.v[self.off];
+ self.off += 1;
+ Some((v, ()))
+ }
+ fn seek(&mut self, key: &T) {
+ self.off = 0;
+ while self.off < self.v.len() {
+ if self.v[self.off] >= *key {
+ break;
+ }
+ self.off += 1;
+ }
+ }
+ }
+ impl<T: Ord + Copy> From<Vec<T>> for SeekableVec<T> {
+ fn from(vec: Vec<T>) -> SeekableVec<T> {
+ let mut sv = SeekableVec { off: 0, v: vec };
+ sv.v.sort();
+ sv
+ }
+ }
+
+ #[test]
+ fn test_seekablevec() {
+ let mut v = vec![3, 1, 4, 1, 5, 9];
+ let mut sv = SeekableVec::from(v.clone());
+ v.sort();
+ sv.seek(&3);
+ assert_eq!(
+ sv.into_iter()
+ .map(|e| e.0)
+ .cmp(v.into_iter().filter(|i| *i >= 3)),
+ Ordering::Equal
+ );
+ }
+
+ #[test]
+ fn test_merge_sv() {
+ let vpi = vec![3, 1, 4, 1, 5, 9];
+ let mut ve = vec![2, 7, 1, 8, 2, 8];
+ let mut mv = Seekable::merge(
+ vec![
+ SeekableVec::from(vpi.clone()),
+ SeekableVec::from(ve.clone()),
+ ]
+ .into_iter(),
+ );
+ mv.seek(&5);
+ ve.extend(vpi);
+ ve.sort();
+ assert_eq!(
+ mv.into_iter()
+ .map(|e| e.0)
+ .cmp(ve.into_iter().filter(|i| *i >= 5)),
+ Ordering::Equal
+ );
+ }
+}
+++ /dev/null
-use mtbl;
-
-#[test]
-fn from_key_value() {
- let e = mtbl::Entry::from_key_value(vec![0u8, 1, 2, 3], vec![4u8, 5, 6, 7]);
- println!("{:?}", e);
- let (k, v) = e.unpack();
- println!("{:?}", (k, v));
-}