Skip to content

v0.12.0-alpha #65

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 32 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
f962e3e
Add TODO
jzombie Jul 29, 2025
b20591e
Add additional TODO
jzombie Jul 29, 2025
1a82ba4
Add TODO
jzombie Jul 29, 2025
7f44e36
Add TODO
jzombie Jul 29, 2025
cce3a16
Add TODO
jzombie Jul 29, 2025
26b5927
Add TODO
jzombie Jul 30, 2025
9fa23a7
Add TODO
jzombie Aug 1, 2025
2099bc2
Update TODO
jzombie Aug 1, 2025
9793672
Centralize dependencies
jzombie Aug 8, 2025
b3c4630
Fix `error: hiding a lifetime that's elided elsewhere is confusing`
jzombie Aug 8, 2025
e238fae
Add comment
jzombie Aug 8, 2025
9d055d0
Preempt version `0.12.0-alpha`
jzombie Aug 9, 2025
1703dd6
Centralize additional deps
jzombie Aug 9, 2025
1eac14b
Remove TODO
jzombie Aug 9, 2025
cd0da0f
Remove TODO
jzombie Aug 9, 2025
5adb64f
Add TODOs
jzombie Aug 9, 2025
1281657
Rename `hashed_payloads` to `prehashed_keys`
jzombie Aug 9, 2025
4a2174d
Bump lock for local dev
jzombie Aug 9, 2025
e9b0306
Rename to `batch_write_with_key_hashes`
jzombie Aug 9, 2025
2313733
Add add'l writer trait methods
jzombie Aug 9, 2025
42413ff
Implement `batch_read_hashed_keys`
jzombie Aug 9, 2025
0ba9f0a
Implement `batch_delete`
jzombie Aug 9, 2025
3a02528
Update comments
jzombie Aug 9, 2025
a54b66b
Add `exists_with_key_hash` and `read_with_key_hash` methods
jzombie Aug 9, 2025
e9bebf5
Fix Clippy warnings
jzombie Aug 9, 2025
b120aa4
Fix additional Clippy warning
jzombie Aug 9, 2025
378d9cb
Add TODO
jzombie Aug 9, 2025
9b1a85a
Add optional `parallel` feature
jzombie Aug 9, 2025
304e8b4
Update README
jzombie Aug 9, 2025
ad81d99
Fix Clippy warnings
jzombie Aug 9, 2025
a231070
Add additional tests
jzombie Aug 9, 2025
cab83fa
Fix Clippy warnings
jzombie Aug 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 37 additions & 5 deletions .github/workflows/rust-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,25 @@ on:

jobs:
test:
name: Run Rust Tests (OS = ${{ matrix.os }})
name: Test (OS=${{ matrix.os }}, Features=${{ matrix.name }})
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
include:
- name: "Default"
flags: ""
- name: "No Default Features"
flags: "--no-default-features"
- name: "Parallel"
flags: "--features parallel"
- name: "Expose Internal API"
flags: "--features expose-internal-api"
- name: "Parallel + Expose API"
flags: "--features=parallel,expose-internal-api"
- name: "All Features"
flags: "--all-features"

steps:
- name: Checkout repository
Expand All @@ -22,8 +36,26 @@ jobs:
- name: Install Rust
uses: dtolnay/rust-toolchain@stable

- name: Build workspace
run: cargo build --workspace
# Added caching step to speed up dependency builds.
- name: Cache Cargo dependencies
uses: actions/cache@v4
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
target/
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}-${{ matrix.flags }}
restore-keys: |
${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}-

- name: Run tests
run: cargo test --workspace --all-targets --verbose
- name: Build
run: cargo build --workspace --all-targets ${{ matrix.flags }}

- name: Test
run: cargo test --workspace --all-targets --verbose ${{ matrix.flags }}

# Added step to ensure benchmarks compile. `--no-run` is important.
- name: Check benchmarks compile
run: cargo bench --workspace --no-run ${{ matrix.flags }}
39 changes: 34 additions & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 25 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
[workspace.package]
authors = ["Jeremy Harris <[email protected]>"]
version = "0.11.0-alpha"
version = "0.12.0-alpha"
edition = "2024"
repository = "https://github.com/jzombie/rust-simd-r-drive"
license = "Apache-2.0"
categories = ["database-implementations", "data-structures", "filesystem"]
keywords = ["storage-engine", "binary-storage", "append-only", "simd", "mmap"]
publish = true
publish = false

[package]
name = "simd-r-drive"
Expand All @@ -22,9 +22,27 @@ publish.workspace = true # Inherit from workspace

[workspace.dependencies]
# Intra-workspace crates
simd-r-drive = { path = ".", version = "0.11.0-alpha" }
simd-r-drive-ws-client = { path = "./experiments/simd-r-drive-ws-client", version = "0.11.0-alpha" }
simd-r-drive-muxio-service-definition = { path = "./experiments/simd-r-drive-muxio-service-definition", version = "0.11.0-alpha" }
simd-r-drive = { path = ".", version = "0.12.0-alpha" }
simd-r-drive-ws-client = { path = "./experiments/simd-r-drive-ws-client", version = "0.12.0-alpha" }
simd-r-drive-muxio-service-definition = { path = "./experiments/simd-r-drive-muxio-service-definition", version = "0.12.0-alpha" }
muxio-tokio-rpc-client = "0.9.0-alpha"
muxio-tokio-rpc-server = "0.9.0-alpha"
muxio-rpc-service = "0.9.0-alpha"
muxio-rpc-service-caller = "0.9.0-alpha"

# Third-party crates (note, not all dependencies are used in the base drive)
async-trait = "0.1.88"
bincode = "1.3.3" # TODO: Replace with `bitcode`
bitcode = "0.6.6"
clap = "4.5.40"
doc-comment = "0.3.3"
indoc = "2.0.6"
serde = "1.0.219"
tokio = "1.45.1" # Tokio is not used in base `SIMD R Drive`, only extensions
tempfile = "3.19.0"
tracing = "0.1.41"
tracing-subscriber = "0.3.19"
walkdir = "2"

[dependencies]
async-trait = "0.1.88"
Expand All @@ -34,6 +52,7 @@ memmap2 = "0.9.5"
dashmap = "6.1.0"
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }
rayon = { version = "1.10.0", optional = true }

[dependencies.clap]
version = "4.5.32"
Expand Down Expand Up @@ -66,6 +85,7 @@ harness = false
[features]
default = []
expose-internal-api = []
parallel = ["rayon"]

[workspace]
members = [
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Can be used as a command line interface (CLI) app, or as a library in another ap
- [Multiple Read Modes](#multiple-read-modes)
- [Direct memory access](#direct-memory-access)
- [Streaming](#streaming)
- [Parallel Iteration (via Rayon)](#parallel-iteration-via-rayon)
- [SIMD Write & Query Acceleration](#simd-write--query-acceleration)
- [Python Bindings and Experiments](#python-bindings-and-experiments)
- [License](#license)
Expand Down Expand Up @@ -207,6 +208,11 @@ This avoids high memory overhead while still leveraging `mmap` for efficient acc

> ⚠️ Streaming reads are non-zero-copy since they are read through a buffer.

### Parallel Iteration (via Rayon)

For high-throughput, bulk processing on multi-core machines, `SIMD R Drive` offers an optional parallel iterator. When the `parallel` feature is enabled, you can use the Rayon-powered `.par_iter_entries()` method to process all valid entries in the data store across multiple threads.

This is ideal for data analytics, batch processing, or building in-memory caches where you need to scan the entire dataset as quickly as possible.

## SIMD Write & Query Acceleration

Expand Down
2 changes: 1 addition & 1 deletion experiments/bindings/python_(old_client)/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "simd-r-drive-py"
version = "0.11.0-alpha"
version = "0.12.0-alpha"
description = "SIMD-optimized append-only schema-less storage engine. Key-based binary storage in a single-file storage container."
repository = "https://github.com/jzombie/rust-simd-r-drive"
license = "Apache-2.0"
Expand Down
4 changes: 2 additions & 2 deletions experiments/simd-r-drive-muxio-service-definition/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ keywords.workspace = true # Inherit from workspace
publish.workspace = true # Inherit from workspace

[dependencies]
bitcode = "0.6.6"
muxio-rpc-service = "0.9.0-alpha"
bitcode = { workspace = true }
muxio-rpc-service = { workspace = true }

12 changes: 6 additions & 6 deletions experiments/simd-r-drive-ws-client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ publish.workspace = true # Inherit from workspace
[dependencies]
simd-r-drive = { workspace = true }
simd-r-drive-muxio-service-definition = { workspace = true }
muxio-tokio-rpc-client = "0.9.0-alpha"
muxio-rpc-service = "0.9.0-alpha"
muxio-rpc-service-caller = "0.9.0-alpha"
tokio = "1.45.1"
tracing = "0.1.41"
async-trait = "0.1.88"
muxio-tokio-rpc-client = { workspace = true }
muxio-rpc-service = { workspace = true }
muxio-rpc-service-caller = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
async-trait = { workspace = true }
47 changes: 47 additions & 0 deletions experiments/simd-r-drive-ws-client/src/ws_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ impl AsyncDataStoreWriter for WsClient {
unimplemented!("`write_stream` is not currently implemented");
}

async fn write_stream_with_key_hash<R: std::io::Read>(
&self,
_key_hash: u64,
_reader: &mut R,
) -> Result<u64> {
unimplemented!("`write_stream_with_key_hash` is not currently implemented");
}

async fn write(&self, key: &[u8], payload: &[u8]) -> Result<u64> {
let response_params = Write::call(
&self.rpc_client,
Expand All @@ -52,6 +60,10 @@ impl AsyncDataStoreWriter for WsClient {
Ok(response_params.tail_offset)
}

async fn write_with_key_hash(&self, _key_hash: u64, _payload: &[u8]) -> Result<u64> {
unimplemented!("`write_with_key_hash` is not currently implemented");
}

async fn batch_write(&self, entries: &[(&[u8], &[u8])]) -> Result<u64> {
let response_params = BatchWrite::call(
&self.rpc_client,
Expand All @@ -67,6 +79,14 @@ impl AsyncDataStoreWriter for WsClient {
Ok(response_params.tail_offset)
}

async fn batch_write_with_key_hashes(
&self,
_prehashed_keys: Vec<(u64, &[u8])>,
_allow_null_bytes: bool,
) -> Result<u64> {
unimplemented!("`batch_write_with_key_hashes` is not currently implemented");
}

async fn rename(&self, _old_key: &[u8], _new_key: &[u8]) -> Result<u64> {
unimplemented!("`rename` is not currently implemented");
}
Expand All @@ -85,6 +105,14 @@ impl AsyncDataStoreWriter for WsClient {

Ok(resp.tail_offset)
}

async fn batch_delete(&self, _keys: &[&[u8]]) -> Result<u64> {
unimplemented!("`batch_delete` is not currently implemented");
}

async fn batch_delete_key_hashes(&self, _prehashed_keys: &[u64]) -> Result<u64> {
unimplemented!("`batch_delete_key_hashes` is not currently implemented");
}
}

#[async_trait::async_trait]
Expand All @@ -99,13 +127,24 @@ impl AsyncDataStoreReader for WsClient {
Ok(response_params.exists)
}

async fn exists_with_key_hash(&self, _prehashed_key: u64) -> Result<bool> {
unimplemented!("`exists_with_key_hash` is not currently implemented");
}

async fn read(&self, key: &[u8]) -> Result<Option<Self::EntryHandleType>> {
let response_params =
Read::call(&self.rpc_client, ReadRequestParams { key: key.to_vec() }).await?;

Ok(response_params.entry_payload)
}

async fn read_with_key_hash(
&self,
_prehashed_key: u64,
) -> Result<Option<Self::EntryHandleType>> {
unimplemented!("`read_with_key_hash` is not currently implemented");
}

async fn read_last_entry(&self) -> Result<Option<Self::EntryHandleType>> {
unimplemented!("`read_last_entry` is not currently implemented");
}
Expand All @@ -122,6 +161,14 @@ impl AsyncDataStoreReader for WsClient {
Ok(batch_read_result.entries_payloads)
}

async fn batch_read_hashed_keys(
&self,
_prehashed_keys: &[u64],
_non_hashed_keys: Option<&[&[u8]]>,
) -> Result<Vec<Option<Self::EntryHandleType>>> {
unimplemented!("`batch_read_hashed_keys` is not currently implemented");
}

async fn read_metadata(&self, _key: &[u8]) -> Result<Option<EntryMetadata>> {
unimplemented!("`read_metadata` is not currently implemented");
}
Expand Down
Loading
Loading