Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 22 additions & 19 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ get_filename_component(binroot ${CMAKE_CURRENT_BINARY_DIR}/.. ABSOLUTE)

# Platform detection
if(APPLE)
set(OS "macos")
set(OS "macos")
elseif(UNIX)
set(OS "linux")
set(OS "linux")
endif()
message(STATUS "OS detected: ${OS}")

Expand Down Expand Up @@ -41,28 +41,28 @@ endfunction()

# Define shared object setup function
function(setup_shared_object_target target)
if(APPLE)
set_target_properties(${target} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
# Force .so extension on macOS instead of .dylib
set_target_properties(${target} PROPERTIES SUFFIX ".so")
else()
# We are building a shared library and want to verify that any reference to a symbol within the library will resolve to
# the library's own definition, rather than to a definition in another shared library or the main executable.
set_target_properties(${target} PROPERTIES LINK_FLAGS "-pthread -shared -Bsymbolic -Bsymbolic-functions")
endif()
set_target_properties(${target} PROPERTIES PREFIX "")
if(APPLE)
set_target_properties(${target} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
# Force .so extension on macOS instead of .dylib
set_target_properties(${target} PROPERTIES SUFFIX ".so")
else()
# We are building a shared library and want to verify that any reference to a symbol within the library will resolve to
# the library's own definition, rather than to a definition in another shared library or the main executable.
set_target_properties(${target} PROPERTIES LINK_FLAGS "-pthread -shared -Bsymbolic -Bsymbolic-functions")
endif()
set_target_properties(${target} PROPERTIES PREFIX "")
endfunction()

# Define debug symbols extraction function
function(extract_debug_symbols target)
if(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo" AND NOT APPLE)
add_custom_command(TARGET ${target} POST_BUILD
if(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo" AND NOT APPLE)
add_custom_command(TARGET ${target} POST_BUILD
COMMAND cp $<TARGET_FILE:${target}> $<TARGET_FILE:${target}>.debug
COMMAND objcopy --add-gnu-debuglink=$<TARGET_FILE:${target}>.debug $<TARGET_FILE:${target}>
COMMAND strip -g $<TARGET_FILE:${target}>
COMMENT "Extracting debug symbols from ${target}"
)
endif()
endif()
endfunction()

#----------------------------------------------------------------------------------------------
Expand All @@ -85,12 +85,12 @@ if(NOT DEFINED COORD_TYPE)
endif()

if(COORD_TYPE STREQUAL "oss")
set(BINDIR "${binroot}/search-community")
set(BINDIR "${binroot}/search-community")
elseif(COORD_TYPE STREQUAL "rlec")
set(BINDIR "${binroot}/search-enterprise")
add_compile_definitions(PRIVATE RS_CLUSTER_ENTERPRISE)
set(BINDIR "${binroot}/search-enterprise")
add_compile_definitions(PRIVATE RS_CLUSTER_ENTERPRISE)
else()
message(FATAL_ERROR "Invalid COORD_TYPE (='${COORD_TYPE}'). Should be either 'oss' or 'rlec'")
message(FATAL_ERROR "Invalid COORD_TYPE (='${COORD_TYPE}'). Should be either 'oss' or 'rlec'")
endif()

#----------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -232,6 +232,7 @@ option(VECSIM_BUILD_TESTS "Build vecsim tests" OFF)

add_subdirectory(deps/VectorSimilarity)
add_subdirectory(src/geometry)
add_subdirectory(src/util/arr)
add_subdirectory(src/util/hash)
add_subdirectory(src/coord)

Expand Down Expand Up @@ -268,6 +269,7 @@ add_library(rscore OBJECT ${SOURCES})

set(FINAL_OBJECTS
$<TARGET_OBJECTS:trie>
$<TARGET_OBJECTS:arr>
$<TARGET_OBJECTS:rscore>
$<TARGET_OBJECTS:rmutil>
$<TARGET_OBJECTS:friso>
Expand Down Expand Up @@ -296,6 +298,7 @@ target_link_libraries(redisearch
redisearch-hash
VectorSimilarity
redisearch-coord
arr
trie
uv_a
${BINDIR}/redisearch_rs/libredisearch_rs.a
Expand Down
4 changes: 4 additions & 0 deletions src/redisearch_rs/trie_bencher/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ bench = false
name = "operations"
harness = false

[[bench]]
name = "iter"
harness = false

[dependencies]
crc32fast.workspace = true
criterion.workspace = true
Expand Down
37 changes: 37 additions & 0 deletions src/redisearch_rs/trie_bencher/benches/iter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright (c) 2006-Present, Redis Ltd.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
* GNU Affero General Public License v3 (AGPLv3).
*/

//! Benchmark the iterators exposed by a trie map.

use criterion::{Criterion, criterion_group, criterion_main};
use trie_bencher::OperationBencher;

use trie_bencher::corpus::CorpusType;

fn iter_benches_wiki1k(c: &mut Criterion) {
let corpus = CorpusType::RedisBench1kWiki;
let terms = corpus.create_terms(true);

let bencher = OperationBencher::new("Wiki-1K".to_owned(), terms, None);
// Matches "A" and "Abacus"
bencher.find_prefixes_group(c, "Abacuses", "Find prefixes");
}

fn iter_benches_gutenberg(c: &mut Criterion) {
let corpus = CorpusType::GutenbergEbook(true);
let terms = corpus.create_terms(true);

let bencher = OperationBencher::new("Gutenberg".to_owned(), terms, None);
// Matches "ever", "everlasting" and "everlastingly".
bencher.find_prefixes_group(c, "everlastingly", "Find prefixes");
}

criterion_group!(wiki_1k_iter, iter_benches_wiki1k);
criterion_group!(gutenberg_iter, iter_benches_gutenberg);
criterion_main!(wiki_1k_iter, gutenberg_iter);
50 changes: 33 additions & 17 deletions src/redisearch_rs/trie_bencher/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,7 @@ fn main() {

// Construct the correct folder path based on OS and architecture
let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap();
let lib_dir = {
let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap();
let target_arch = match target_arch.as_str() {
"x86_64" => "x64",
_ => &target_arch,
};

root.join(format!(
"bin/{target_os}-{target_arch}-release/search-community/deps/triemap"
))
};

assert!(std::fs::exists(lib_dir.join("libtrie.a")).unwrap());
// There are several symbols exposed by `libtrie.a` that we don't
// actually invoke (either directly or indirectly) in our benchmarks.
// We provide a definition for the ones we need (e.g. Redis' allocation functions),
Expand All @@ -39,12 +27,32 @@ fn main() {
if target_os != "macos" {
println!("cargo:rustc-link-arg=-Wl,--unresolved-symbols=ignore-in-object-files");
}

let bin_root = {
let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap();
let target_arch = match target_arch.as_str() {
"x86_64" => "x64",
_ => &target_arch,
};

root.join(format!(
"bin/{target_os}-{target_arch}-release/search-community/"
))
};

let libtrie_dir = bin_root.join("deps/triemap");
let libtrie = libtrie_dir.join("libtrie.a");
assert!(std::fs::exists(&libtrie).unwrap());
println!("cargo:rustc-link-lib=static=trie");
println!("cargo:rustc-link-search=native={}", lib_dir.display());
println!(
"cargo:rerun-if-changed={}",
lib_dir.join("libtrie.a").display()
);
println!("cargo:rerun-if-changed={}", libtrie.display());
println!("cargo:rustc-link-search=native={}", libtrie_dir.display());

let libarr_dir = bin_root.join("src/util/arr");
let libarr = libarr_dir.join("libarr.a");
assert!(std::fs::exists(&libarr).unwrap());
println!("cargo:rustc-link-lib=static=arr");
println!("cargo:rerun-if-changed={}", libarr.display());
println!("cargo:rustc-link-search=native={}", libarr_dir.display());

let redis_modules = root.join("deps").join("RedisModulesSDK");
let src = root.join("src");
Expand All @@ -57,6 +65,14 @@ fn main() {
.to_str()
.unwrap(),
)
.header(
root.join("src")
.join("util")
.join("arr")
.join("arr.h")
.to_str()
.unwrap(),
)
.clang_arg(format!("-I{}", src.display()))
.clang_arg(format!("-I{}", deps.display()))
.clang_arg(format!("-I{}", redis_modules.display()))
Expand Down
32 changes: 32 additions & 0 deletions src/redisearch_rs/trie_bencher/src/bencher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,38 @@ impl OperationBencher {
load_c_benchmark(&mut group, &self.keys);
group.finish();
}

/// Benchmark the find prefixes iterator.
///
/// The benchmark group will be marked with the given label.
pub fn find_prefixes_group(&self, c: &mut Criterion, target: &str, label: &str) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a comment: Not sure what is the strategy to do the benchmarking. I wonder if we should be using more than one key as a query to the operations to benchmark.

Ignore my comment if this has been the case for other operations.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We generally try to exercise all the different "modes" of the operation.
If you look at the operations.rs file, you can see how we classify inputs based on what part of the algorithm they "hit".

For prefixes, we could add inputs that exit earlier as an additional data point.
Alternatively, we can add a dedicated corpus with a higher opportunity for matches, but it may be overkill.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added another bench in 58a4161 using the full Gutenberg dataset, which is ~10x the size.
We can see that the timing scales (roughly) with the depth of the traversal, which is what we expect.

let mut group = self.benchmark_group_immutable(c, label);
find_prefixes_rust_benchmark(&mut group, &self.rust_map, target);
find_prefixes_c_benchmark(&mut group, &self.keys, target);
group.finish();
}
}

fn find_prefixes_rust_benchmark<M: Measurement>(
c: &mut BenchmarkGroup<'_, M>,
map: &RustTrieMap,
target: &str,
) {
let target = target.as_bytes();
c.bench_function("Rust", |b| {
b.iter(|| map.prefixes_iter(black_box(target)).collect::<Vec<_>>())
});
}

fn find_prefixes_c_benchmark<M: Measurement>(
c: &mut BenchmarkGroup<'_, M>,
terms: &[String],
target: &str,
) {
let target = target.into_cstring();
let view = target.as_view();
let map = c_load_from_terms(terms);
c.bench_function("C", |b| b.iter(|| map.find_prefixes(view)));
}

fn find_rust_benchmark<M: Measurement>(
Expand Down
24 changes: 24 additions & 0 deletions src/redisearch_rs/trie_bencher/src/c_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
//! and providing access to the raw pointer and length of the string as needed by the C API.
use std::ffi::{CStr, CString, c_char, c_void};

use crate::ffi::{array_free, array_new_sz};

#[repr(transparent)]
/// A thin wrapper around the C TrieMap implementation to ensure that the map is properly initialized and cleaned up.
pub struct CTrieMap(*mut crate::ffi::TrieMap);
Expand Down Expand Up @@ -44,6 +46,19 @@ impl CTrieMap {
unsafe { crate::ffi::TrieMap_Delete(self.0, term.ptr(), term.len(), Some(do_not_free)) }
}

pub fn find_prefixes(&self, term: TrieTermView) -> PrefixesValues {
let mut results = {
// Here we are emulating the behaviour of the `array_new` macro, which we can't
// invoke directly on the Rust side.
let raw = unsafe { array_new_sz(std::mem::size_of::<*mut c_void>() as u32, 1, 0) };
raw as *mut *mut c_void
};
let _n_results = unsafe {
crate::ffi::TrieMap_FindPrefixes(self.0, term.ptr(), term.len(), &mut results)
};
PrefixesValues(results)
}

pub fn n_nodes(&self) -> usize {
unsafe { (*self.0).size }
}
Expand All @@ -63,6 +78,15 @@ impl Drop for CTrieMap {
}
}

/// The values attached to the prefixes retrieved by [`CTrieMap::find_prefixes`].
pub struct PrefixesValues(*mut *mut c_void);

impl Drop for PrefixesValues {
fn drop(&mut self) {
unsafe { array_free(self.0 as *mut c_void) };
}
}

unsafe extern "C" fn do_nothing(oldval: *mut c_void, _newval: *mut c_void) -> *mut c_void {
// Just return the old value, since it's a null pointer and we don't want
// the C map implementation to try to free it.
Expand Down
9 changes: 6 additions & 3 deletions src/redisearch_rs/trie_bencher/src/corpus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,12 @@ impl CorpusType {
fn get_pretty_print_path(&self) -> PathBuf {
let path = PathBuf::from("data");
let filename = match self {
CorpusType::RedisBench1kWiki => "redis_wiki1k_titles_bench.txt",
CorpusType::RedisBench10kNumerics => "redis_wiki10k_guids_bench.txt",
CorpusType::GutenbergEbook(_) => "gutenberg_bench.txt",
CorpusType::RedisBench1kWiki => "redis_wiki1k_titles_bench.txt".to_owned(),
CorpusType::RedisBench10kNumerics => "redis_wiki10k_guids_bench.txt".to_owned(),
CorpusType::GutenbergEbook(full) => {
let suffix = if *full { "full" } else { "sample" };
format!("gutenberg_bench_{suffix}.txt")
}
};
path.join(filename)
}
Expand Down
11 changes: 4 additions & 7 deletions src/redisearch_rs/trie_bencher/src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,9 @@ mod bindings {

// We re-export the required bindings only. This list will be extended as needed.
// It allows us to keep the bindings module private while still exposing the necessary functions and types.
pub(crate) use bindings::NewTrieMap;
pub(crate) use bindings::TrieMap;
pub(crate) use bindings::TrieMap_Add;
pub(crate) use bindings::TrieMap_Delete;
pub(crate) use bindings::TrieMap_ExactMemUsage;
pub(crate) use bindings::TrieMap_Find;
pub(crate) use bindings::TrieMap_Free;
pub(crate) use bindings::{
NewTrieMap, TrieMap, TrieMap_Add, TrieMap_Delete, TrieMap_ExactMemUsage, TrieMap_Find,
TrieMap_FindPrefixes, TrieMap_Free, array_free, array_new_sz,
};
// used in outside binary crate (main.rs)
pub use bindings::TRIEMAP_NOTFOUND;
2 changes: 2 additions & 0 deletions src/redisearch_rs/trie_rs/src/iter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
pub mod filter;
mod iter_;
mod lending;
mod prefixes;
mod values;

pub use iter_::Iter;
pub use lending::LendingIter;
pub use prefixes::PrefixesIter;
pub use values::Values;
Loading
Loading