Skip to content

Commit a4315c7

Browse files
committed
Add rabin chunker
The program now supports content defined chunking via a rabin chunker implementation.
1 parent 82e600c commit a4315c7

File tree

8 files changed

+351
-67
lines changed

8 files changed

+351
-67
lines changed

src/chunker/chunker.rs

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
1-
use crate::chunker::dynamicChunker::DynamicChunker;
21
use crate::chunker::fileChunker::FileChunker;
2+
use crate::chunker::rabinChunker::RabinChunker;
33
use crate::chunker::staticChunker::StaticChunker;
4-
use std::slice::Chunks;
54

65
use clap::ValueEnum;
76
use memmap2::Mmap;
87

8+
#[derive(Debug, Clone, Copy)]
9+
pub enum ChunkingScheme {
10+
FILE,
11+
STATIC,
12+
CONTENT,
13+
}
14+
915
#[derive(ValueEnum, Clone, Copy, Debug)]
1016
pub enum ChunkerType {
1117
FILE,
@@ -45,42 +51,46 @@ impl ChunkerType {
4551
ChunkerType::CDC64K => 1 << 16,
4652
}
4753
}
48-
}
49-
50-
pub trait Chunker {
51-
fn chunk<'a>(&self, mmap: &'a Mmap) -> Chunks<'a, u8>;
52-
}
53-
54-
pub struct ChunkFactory {
55-
t: ChunkerType,
56-
s: Option<Box<str>>,
57-
}
58-
59-
impl ChunkFactory {
60-
pub fn new(chunkerType: ChunkerType, salt: Option<Box<str>>) -> Self {
61-
Self {
62-
t: chunkerType,
63-
s: salt,
64-
}
65-
}
6654

67-
pub fn createChunker(&self) -> Box<dyn Chunker> {
68-
match self.t {
69-
ChunkerType::FILE => Box::new(FileChunker::new()),
55+
pub fn getScheme(&self) -> ChunkingScheme {
56+
match self {
57+
ChunkerType::FILE => ChunkingScheme::FILE,
7058
ChunkerType::SC1K
7159
| ChunkerType::SC2K
7260
| ChunkerType::SC4K
7361
| ChunkerType::SC8K
7462
| ChunkerType::SC16K
7563
| ChunkerType::SC32K
76-
| ChunkerType::SC64K => Box::new(StaticChunker::new(ChunkerType::getSize(&self.t))),
64+
| ChunkerType::SC64K => ChunkingScheme::STATIC,
7765
ChunkerType::CDC1K
7866
| ChunkerType::CDC2K
7967
| ChunkerType::CDC4K
8068
| ChunkerType::CDC8K
8169
| ChunkerType::CDC16K
8270
| ChunkerType::CDC32K
83-
| ChunkerType::CDC64K => Box::new(DynamicChunker::new(ChunkerType::getSize(&self.t))),
71+
| ChunkerType::CDC64K => ChunkingScheme::CONTENT,
72+
}
73+
}
74+
}
75+
76+
pub trait Chunker {
77+
fn chunk<'a>(&self, mmap: &'a Mmap) -> Box<dyn Iterator<Item = &'a [u8]> + 'a>;
78+
}
79+
80+
pub struct ChunkFactory {
81+
t: ChunkerType,
82+
}
83+
84+
impl ChunkFactory {
85+
pub fn new(chunkerType: ChunkerType) -> Self {
86+
Self { t: chunkerType }
87+
}
88+
89+
pub fn createChunker(&self) -> Box<dyn Chunker> {
90+
match self.t.getScheme() {
91+
ChunkingScheme::FILE => Box::new(FileChunker::new()),
92+
ChunkingScheme::STATIC => Box::new(StaticChunker::new(self.t.getSize())),
93+
ChunkingScheme::CONTENT => Box::new(RabinChunker::<64>::new(self.t.getSize())),
8494
}
8595
}
8696
}

src/chunker/dynamicChunker.rs

Lines changed: 0 additions & 20 deletions
This file was deleted.

src/chunker/fileChunker.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
use crate::chunker::chunker::Chunker;
22
use memmap2::Mmap;
3-
use std::slice::Chunks;
43

54
pub struct FileChunker {}
65

@@ -11,8 +10,9 @@ impl FileChunker {
1110
}
1211

1312
impl Chunker for FileChunker {
14-
fn chunk<'a>(&self, mmap: &'a Mmap) -> Chunks<'a, u8> {
15-
(&mmap[..]).chunks(mmap.len())
13+
fn chunk<'a>(&self, mmap: &'a Mmap) -> Box<dyn Iterator<Item = &'a [u8]> + 'a> {
14+
let fileSize = mmap.len();
15+
Box::new(mmap.chunks(fileSize))
1616
}
1717
}
1818

@@ -49,8 +49,9 @@ mod tests {
4949
fn testFileChunkerFactory() {
5050
let (data, mmap) = generateTestData();
5151

52-
let factory = ChunkFactory::new(ChunkerType::FILE, None);
52+
let factory = ChunkFactory::new(ChunkerType::FILE);
5353
let chunker = factory.createChunker();
54+
5455
let mut chunks = chunker.chunk(&mmap);
5556

5657
assert_eq!(chunks.next(), Some(data.as_bytes()));

src/chunker/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
pub mod chunker;
2-
pub mod dynamicChunker;
32
pub mod fileChunker;
3+
pub mod rabinChunker;
44
pub mod staticChunker;

0 commit comments

Comments
 (0)