Skip to content

Commit 0da355e

Browse files
committed
fix: seperate edge case for appendable segment
1 parent 64fc8c0 commit 0da355e

3 files changed

Lines changed: 84 additions & 41 deletions

File tree

lib/segment/src/index/struct_payload_index/mod.rs

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -340,28 +340,6 @@ impl StructPayloadIndex {
340340
Ok(())
341341
}
342342

343-
/// Re-persist the schema for an already-existing field without
344-
/// rebuilding its index. The on-disk index files are untouched; only
345-
/// `config.indices[field]` is updated to reflect the new schema and
346-
/// (re-derived) per-index types.
347-
fn update_indexed_schema(
348-
&mut self,
349-
field: &PayloadKeyType,
350-
new_schema: PayloadFieldSchema,
351-
) -> OperationResult<()> {
352-
let index_types: Vec<_> = self
353-
.field_indexes
354-
.get(field)
355-
.map(|indexes| indexes.iter().map(|i| i.get_full_index_type()).collect())
356-
.unwrap_or_default();
357-
self.config.indices.insert(
358-
field.clone(),
359-
PayloadFieldSchemaWithIndexType::new(new_schema, index_types),
360-
);
361-
self.save_config()?;
362-
Ok(())
363-
}
364-
365343
/// Run a closure with a borrowed read-only view of this index.
366344
///
367345
/// The view borrows `id_tracker` (and the payload `Arc`) for the

lib/segment/src/index/struct_payload_index/payload_index.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -118,22 +118,22 @@ impl PayloadIndex for StructPayloadIndex {
118118
return Ok(false);
119119
};
120120

121-
let transition = classify(&current_schema.schema, new_payload_schema);
122-
match transition {
121+
match classify(&current_schema.schema, new_payload_schema) {
123122
SchemaTransition::Identical => Ok(false),
124-
// Only `on_disk` flipped: keep the existing index files instead of
125-
// dropping and rebuilding. Non-appendable reloads in the new mode
126-
// during `build_index`. Appendable Gridstore ignores `on_disk` at
127-
// the storage layer and is unsafe to reload while mutable, so just
128-
// persist the new schema and leave the live index untouched (the
129-
// optimizer applies `on_disk` when converting to non-appendable).
130-
SchemaTransition::OnlyOnDiskFlipped { .. } => {
131-
if matches!(self.storage_type, StorageType::GridstoreAppendable) {
132-
self.update_indexed_schema(field, new_payload_schema.clone())?;
133-
}
123+
// Only `on_disk` flipped on a non-appendable (mmap) segment: keep the
124+
// existing files and reload the index in the new mode during
125+
// `build_index` instead of dropping and rebuilding from payload.
126+
SchemaTransition::OnlyOnDiskFlipped { .. }
127+
if matches!(self.storage_type, StorageType::GridstoreNonAppendable) =>
128+
{
134129
Ok(false)
135130
}
136-
SchemaTransition::Incompatible => self.drop_index(field),
131+
// Appendable Gridstore (its `on_disk` change goes through the normal
132+
// drop-and-rebuild path) or any incompatible change: drop so
133+
// `build_index` rebuilds.
134+
SchemaTransition::OnlyOnDiskFlipped { .. } | SchemaTransition::Incompatible => {
135+
self.drop_index(field)
136+
}
137137
}
138138
}
139139

lib/segment/src/index/struct_payload_index/tests.rs

Lines changed: 71 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,19 +93,49 @@ fn test_load_payload_index() {
9393
}
9494

9595
#[test]
96-
fn drop_index_if_incompatible_keeps_index_on_on_disk_only_change() {
96+
fn drop_index_if_incompatible_keeps_non_appendable_index_on_on_disk_only_change() {
97+
use std::collections::HashMap;
98+
use std::sync::Arc;
99+
100+
use atomic_refcell::AtomicRefCell;
101+
102+
use super::StructPayloadIndex;
97103
use crate::data_types::index::IntegerIndexParams;
98-
use crate::fixtures::payload_context_fixture::create_struct_payload_index;
104+
use crate::fixtures::payload_context_fixture::{
105+
create_id_tracker_fixture, create_payload_storage_fixture,
106+
};
99107
use crate::fixtures::payload_fixtures::INT_KEY;
100108
use crate::index::PayloadIndex;
101109
use crate::types::PayloadSchemaParams;
102110

103111
let dir = Builder::new().prefix("payload_dir").tempdir().unwrap();
104-
let mut index = create_struct_payload_index(dir.path(), 100, 42);
112+
let payload_storage = Arc::new(AtomicRefCell::new(
113+
create_payload_storage_fixture(100, 42).into(),
114+
));
115+
let id_tracker = Arc::new(AtomicRefCell::new(create_id_tracker_fixture(100)));
116+
117+
// Non-appendable: `on_disk` is meaningful, so an on_disk-only change is
118+
// reused (reloaded in `build_index`), not dropped.
119+
let mut index = StructPayloadIndex::open(
120+
payload_storage,
121+
id_tracker,
122+
HashMap::new(),
123+
dir.path(),
124+
false,
125+
true,
126+
)
127+
.unwrap();
128+
105129
let field = JsonPath::from_str(INT_KEY).unwrap();
130+
index
131+
.set_indexed(
132+
&field,
133+
PayloadSchemaType::Integer,
134+
&HardwareCounterCell::new(),
135+
)
136+
.unwrap();
106137

107-
// The fixture indexed INT_KEY as `FieldType(Integer)` (on_disk = false).
108-
// Flip only `on_disk` to true; every other param stays at its default.
138+
// Same integer index, but with on_disk flipped to true.
109139
let on_disk_schema =
110140
PayloadFieldSchema::FieldParams(PayloadSchemaParams::Integer(IntegerIndexParams {
111141
on_disk: Some(true),
@@ -118,14 +148,49 @@ fn drop_index_if_incompatible_keeps_index_on_on_disk_only_change() {
118148

119149
assert!(
120150
!dropped,
121-
"an on_disk-only change must be treated as compatible, not dropped",
151+
"a non-appendable on_disk-only change must be reused, not dropped",
122152
);
123153
assert!(
124154
index.field_indexes.contains_key(&field),
125155
"the index must remain present after an on_disk-only change",
126156
);
127157
}
128158

159+
#[test]
160+
fn drop_index_if_incompatible_drops_appendable_index_on_on_disk_only_change() {
161+
use crate::data_types::index::IntegerIndexParams;
162+
use crate::fixtures::payload_context_fixture::create_struct_payload_index;
163+
use crate::fixtures::payload_fixtures::INT_KEY;
164+
use crate::index::PayloadIndex;
165+
use crate::types::PayloadSchemaParams;
166+
167+
let dir = Builder::new().prefix("payload_dir").tempdir().unwrap();
168+
// Appendable Gridstore: `on_disk` has no storage-layer effect, so an
169+
// on_disk-only change falls back to the drop-and-rebuild path (optimizing
170+
// the appendable case is handled separately).
171+
let mut index = create_struct_payload_index(dir.path(), 100, 42);
172+
let field = JsonPath::from_str(INT_KEY).unwrap();
173+
174+
let on_disk_schema =
175+
PayloadFieldSchema::FieldParams(PayloadSchemaParams::Integer(IntegerIndexParams {
176+
on_disk: Some(true),
177+
..Default::default()
178+
}));
179+
180+
let dropped = index
181+
.drop_index_if_incompatible(&field, &on_disk_schema)
182+
.unwrap();
183+
184+
assert!(
185+
dropped,
186+
"an appendable on_disk-only change drops (rebuild path) on this branch",
187+
);
188+
assert!(
189+
!index.field_indexes.contains_key(&field),
190+
"the appendable index is removed by the drop",
191+
);
192+
}
193+
129194
#[test]
130195
fn build_index_reloads_in_new_mode_on_on_disk_change() {
131196
use std::collections::HashMap;

0 commit comments

Comments
 (0)