Skip to content

Commit 916efb6

Browse files
authored
[9.3] Fix dense_vector default index options when using BFLOAT16 (#145202) (#145356)
1 parent 783ecee commit 916efb6

7 files changed

Lines changed: 114 additions & 12 deletions

File tree

docs/changelog/145202.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
area: Vector Search
2+
issues:
3+
- 145204
4+
pr: 145202
5+
summary: Fix `dense_vector` default index options when using BFLOAT16
6+
type: bug

server/src/main/java/org/elasticsearch/index/IndexVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ private static Version parseUnchecked(String version) {
211211
public static final IndexVersion TEXT_FIELDS_STORED_IN_IGNORED_SOURCE_FIX = def(9_058_0_00, Version.LUCENE_10_3_2);
212212
public static final IndexVersion DEFAULT_HNSW_EARLY_TERMINATION = def(9_059_0_00, Version.LUCENE_10_3_2);
213213
public static final IndexVersion PATTERN_TEXT_ARGS_IN_BINARY_DOC_VALUES = def(9_060_0_00, Version.LUCENE_10_3_2);
214+
public static final IndexVersion DENSE_VECTOR_BFLOAT16_DEFAULT_INDEX_OPTIONS_BACKPORT = def(9_060_0_01, Version.LUCENE_10_3_2);
214215

215216
/*
216217
* STOP! READ THIS FIRST! No, really,

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,8 @@ private static boolean defaultOversampleForBBQ(IndexVersion version) {
218218
public static final IndexVersion DEFAULT_TO_INT8 = IndexVersions.DEFAULT_DENSE_VECTOR_TO_INT8_HNSW;
219219
public static final IndexVersion DEFAULT_TO_BBQ = IndexVersions.DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW;
220220
public static final IndexVersion LITTLE_ENDIAN_FLOAT_STORED_INDEX_VERSION = IndexVersions.V_8_9_0;
221+
public static final IndexVersion BFLOAT16_DEFAULT_INDEX_OPTIONS_BACKPORT =
222+
IndexVersions.DENSE_VECTOR_BFLOAT16_DEFAULT_INDEX_OPTIONS_BACKPORT;
221223

222224
public static final NodeFeature RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature("mapper.dense_vector.rescore_vector");
223225
public static final NodeFeature RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature(
@@ -384,7 +386,7 @@ public Builder(
384386
}
385387

386388
private DenseVectorIndexOptions defaultIndexOptions(boolean defaultInt8Hnsw, boolean defaultBBQHnsw) {
387-
if (elementType.getValue() != ElementType.FLOAT || indexed.getValue() == false) {
389+
if (elementTypesWithDefaultIndexOptions().contains(elementType.getValue()) == false || indexed.getValue() == false) {
388390
return null;
389391
}
390392

@@ -415,6 +417,14 @@ private DenseVectorIndexOptions defaultIndexOptions(boolean defaultInt8Hnsw, boo
415417
return null;
416418
}
417419

420+
private Set<ElementType> elementTypesWithDefaultIndexOptions() {
421+
if (indexVersionCreated.onOrAfter(BFLOAT16_DEFAULT_INDEX_OPTIONS_BACKPORT)) {
422+
return Set.of(ElementType.FLOAT, ElementType.BFLOAT16);
423+
} else {
424+
return Set.of(ElementType.FLOAT);
425+
}
426+
}
427+
418428
@Override
419429
protected Parameter<?>[] getParameters() {
420430
return new Parameter<?>[] { elementType, dims, indexed, similarity, indexOptions, meta };

server/src/main/java/org/elasticsearch/inference/SimilarityMeasure.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,21 @@
1010
package org.elasticsearch.inference;
1111

1212
import org.elasticsearch.TransportVersion;
13+
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
1314

1415
import java.util.EnumSet;
1516
import java.util.Locale;
1617

1718
public enum SimilarityMeasure {
18-
COSINE,
19-
DOT_PRODUCT,
20-
L2_NORM;
19+
COSINE(DenseVectorFieldMapper.VectorSimilarity.COSINE),
20+
DOT_PRODUCT(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT),
21+
L2_NORM(DenseVectorFieldMapper.VectorSimilarity.L2_NORM),;
22+
23+
private final DenseVectorFieldMapper.VectorSimilarity vectorSimilarity;
24+
25+
SimilarityMeasure(DenseVectorFieldMapper.VectorSimilarity vectorSimilarity) {
26+
this.vectorSimilarity = vectorSimilarity;
27+
}
2128

2229
private static final EnumSet<SimilarityMeasure> BEFORE_L2_NORM_ENUMS = EnumSet.range(COSINE, DOT_PRODUCT);
2330

@@ -26,6 +33,10 @@ public String toString() {
2633
return name().toLowerCase(Locale.ROOT);
2734
}
2835

36+
public DenseVectorFieldMapper.VectorSimilarity vectorSimilarity() {
37+
return vectorSimilarity;
38+
}
39+
2940
public static SimilarityMeasure fromString(String name) {
3041
return valueOf(name.trim().toUpperCase(Locale.ROOT));
3142
}

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTestUtils.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,16 @@
1212
import com.carrotsearch.randomizedtesting.RandomizedContext;
1313
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
1414

15+
import org.elasticsearch.index.IndexVersion;
1516
import org.elasticsearch.inference.SimilarityMeasure;
1617

18+
import java.util.Collections;
19+
import java.util.HashSet;
1720
import java.util.List;
1821
import java.util.Random;
22+
import java.util.Set;
23+
24+
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.BFLOAT16_DEFAULT_INDEX_OPTIONS_BACKPORT;
1925

2026
public class DenseVectorFieldMapperTestUtils {
2127
private DenseVectorFieldMapperTestUtils() {}
@@ -56,6 +62,16 @@ public static int randomCompatibleDimensions(DenseVectorFieldMapper.ElementType
5662
};
5763
}
5864

65+
public static Set<DenseVectorFieldMapper.ElementType> elementTypesWithDefaultIndexOptions(IndexVersion indexVersion) {
66+
Set<DenseVectorFieldMapper.ElementType> elementTypes = new HashSet<>();
67+
elementTypes.add(DenseVectorFieldMapper.ElementType.FLOAT);
68+
if (indexVersion.onOrAfter(BFLOAT16_DEFAULT_INDEX_OPTIONS_BACKPORT)) {
69+
elementTypes.add(DenseVectorFieldMapper.ElementType.BFLOAT16);
70+
}
71+
72+
return Collections.unmodifiableSet(elementTypes);
73+
}
74+
5975
private static Random random() {
6076
return RandomizedContext.current().getRandom();
6177
}

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.ElementType;
5353
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.VectorSimilarity;
5454
import org.elasticsearch.index.query.SearchExecutionContext;
55+
import org.elasticsearch.inference.SimilarityMeasure;
5556
import org.elasticsearch.search.lookup.Source;
5657
import org.elasticsearch.search.lookup.SourceProvider;
5758
import org.elasticsearch.search.vectors.VectorData;
@@ -1337,6 +1338,70 @@ public void testDefaultParamsIndexByDefault() throws Exception {
13371338
assertEquals(VectorSimilarity.COSINE, denseVectorFieldType.getSimilarity());
13381339
}
13391340

1341+
public void testDefaultIndexOptions() throws IOException {
1342+
for (int i = 0; i < 100; i++) {
1343+
// Pick a random index version from one of three eras that each produce different default index options
1344+
int era = randomIntBetween(0, 3);
1345+
IndexVersion indexVersion = switch (era) {
1346+
case 0 -> IndexVersionUtils.randomVersionBetween(
1347+
random(),
1348+
IndexVersionUtils.getLowestReadCompatibleVersion(),
1349+
IndexVersionUtils.getPreviousVersion(DenseVectorFieldMapper.DEFAULT_TO_INT8)
1350+
);
1351+
case 1 -> IndexVersionUtils.randomVersionBetween(
1352+
random(),
1353+
DenseVectorFieldMapper.DEFAULT_TO_INT8,
1354+
IndexVersionUtils.getPreviousVersion(DenseVectorFieldMapper.DEFAULT_TO_BBQ)
1355+
);
1356+
case 2 -> IndexVersionUtils.randomVersionBetween(
1357+
random(),
1358+
DenseVectorFieldMapper.DEFAULT_TO_BBQ,
1359+
IndexVersionUtils.getPreviousVersion(DenseVectorFieldMapper.BFLOAT16_DEFAULT_INDEX_OPTIONS_BACKPORT)
1360+
);
1361+
case 3 -> IndexVersionUtils.randomVersionBetween(
1362+
random(),
1363+
DenseVectorFieldMapper.BFLOAT16_DEFAULT_INDEX_OPTIONS_BACKPORT,
1364+
IndexVersion.current()
1365+
);
1366+
default -> throw new AssertionError("Unexpected value: " + era);
1367+
};
1368+
1369+
boolean defaultInt8Hnsw = indexVersion.onOrAfter(DenseVectorFieldMapper.DEFAULT_TO_INT8);
1370+
boolean defaultBBQHnsw = indexVersion.onOrAfter(DenseVectorFieldMapper.DEFAULT_TO_BBQ);
1371+
1372+
final ElementType elementType = randomFrom(ElementType.values());
1373+
final int dims = DenseVectorFieldMapperTestUtils.randomCompatibleDimensions(elementType, 512);
1374+
final VectorSimilarity similarity = randomFrom(
1375+
DenseVectorFieldMapperTestUtils.getSupportedSimilarities(elementType)
1376+
.stream()
1377+
.map(SimilarityMeasure::vectorSimilarity)
1378+
.toList()
1379+
);
1380+
1381+
MapperService mapperService = createMapperService(indexVersion, fieldMapping(b -> {
1382+
b.field("type", "dense_vector");
1383+
b.field("index", true);
1384+
b.field("element_type", elementType.toString());
1385+
b.field("dims", dims);
1386+
b.field("similarity", similarity.toString());
1387+
}));
1388+
1389+
DenseVectorFieldMapper mapper = (DenseVectorFieldMapper) mapperService.mappingLookup().getMapper("field");
1390+
DenseVectorFieldMapper.DenseVectorIndexOptions indexOptions = mapper.fieldType().getIndexOptions();
1391+
1392+
if (DenseVectorFieldMapperTestUtils.elementTypesWithDefaultIndexOptions(indexVersion).contains(elementType) == false) {
1393+
assertNull(indexOptions);
1394+
} else if (defaultBBQHnsw && dims >= DenseVectorFieldMapper.BBQ_DIMS_DEFAULT_THRESHOLD) {
1395+
assertThat(indexOptions, instanceOf(DenseVectorFieldMapper.BBQHnswIndexOptions.class));
1396+
} else if (defaultInt8Hnsw) {
1397+
// INT8 era, or BBQ era with dims below the BBQ threshold
1398+
assertThat(indexOptions, instanceOf(DenseVectorFieldMapper.Int8HnswIndexOptions.class));
1399+
} else {
1400+
assertNull(indexOptions);
1401+
}
1402+
}
1403+
}
1404+
13401405
public void testValidateOnBuild() {
13411406
final MapperBuilderContext context = MapperBuilderContext.root(false, false);
13421407

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1413,14 +1413,7 @@ private static void configureDenseVectorMapperBuilder(
14131413
if (indexVersionCreated.onOrAfter(NEW_SPARSE_VECTOR)) {
14141414
SimilarityMeasure similarity = modelSettings.similarity();
14151415
if (similarity != null) {
1416-
switch (similarity) {
1417-
case COSINE -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.COSINE);
1418-
case DOT_PRODUCT -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.DOT_PRODUCT);
1419-
case L2_NORM -> denseVectorMapperBuilder.similarity(DenseVectorFieldMapper.VectorSimilarity.L2_NORM);
1420-
default -> throw new IllegalArgumentException(
1421-
"Unknown similarity measure in model_settings [" + similarity.name() + "]"
1422-
);
1423-
}
1416+
denseVectorMapperBuilder.similarity(similarity.vectorSimilarity());
14241417
}
14251418
}
14261419

0 commit comments

Comments
 (0)