@@ -55,6 +55,15 @@ namespace faiss {
5555#endif
5656#endif
5757
58+ #if defined(__aarch64__)
59+ #if defined(__GNUC__) && __GNUC__ < 8
60+ #warning \
61+ " Cannot enable NEON optimizations in scalar quantizer if the compiler is GCC<8"
62+ #else
63+ #define USE_NEON
64+ #endif
65+ #endif
66+
5867namespace {
5968
6069typedef ScalarQuantizer::QuantizerType QuantizerType;
@@ -105,7 +114,7 @@ struct Codec8bit {
105114 }
106115#endif
107116
108- #ifdef __aarch64__
117+ #ifdef USE_NEON
109118 static FAISS_ALWAYS_INLINE float32x4x2_t
110119 decode_8_components (const uint8_t * code, int i) {
111120 float32_t result[8 ] = {};
@@ -175,7 +184,7 @@ struct Codec4bit {
175184 }
176185#endif
177186
178- #ifdef __aarch64__
187+ #ifdef USE_NEON
179188 static FAISS_ALWAYS_INLINE float32x4x2_t
180189 decode_8_components (const uint8_t * code, int i) {
181190 float32_t result[8 ] = {};
@@ -336,7 +345,7 @@ struct Codec6bit {
336345
337346#endif
338347
339- #ifdef __aarch64__
348+ #ifdef USE_NEON
340349 static FAISS_ALWAYS_INLINE float32x4x2_t
341350 decode_8_components (const uint8_t * code, int i) {
342351 float32_t result[8 ] = {};
@@ -437,7 +446,7 @@ struct QuantizerTemplate<Codec, QuantizerTemplateScaling::UNIFORM, 8>
437446
438447#endif
439448
440- #ifdef __aarch64__
449+ #ifdef USE_NEON
441450
442451template <class Codec >
443452struct QuantizerTemplate <Codec, QuantizerTemplateScaling::UNIFORM, 8 >
@@ -546,7 +555,7 @@ struct QuantizerTemplate<Codec, QuantizerTemplateScaling::NON_UNIFORM, 8>
546555
547556#endif
548557
549- #ifdef __aarch64__
558+ #ifdef USE_NEON
550559
551560template <class Codec >
552561struct QuantizerTemplate <Codec, QuantizerTemplateScaling::NON_UNIFORM, 8 >
@@ -634,7 +643,7 @@ struct QuantizerFP16<8> : QuantizerFP16<1> {
634643
635644#endif
636645
637- #ifdef __aarch64__
646+ #ifdef USE_NEON
638647
639648template <>
640649struct QuantizerFP16 <8 > : QuantizerFP16<1 > {
@@ -714,7 +723,7 @@ struct QuantizerBF16<8> : QuantizerBF16<1> {
714723
715724#endif
716725
717- #ifdef __aarch64__
726+ #ifdef USE_NEON
718727
719728template <>
720729struct QuantizerBF16 <8 > : QuantizerBF16<1 > {
@@ -795,7 +804,7 @@ struct Quantizer8bitDirect<8> : Quantizer8bitDirect<1> {
795804
796805#endif
797806
798- #ifdef __aarch64__
807+ #ifdef USE_NEON
799808
800809template <>
801810struct Quantizer8bitDirect <8 > : Quantizer8bitDirect<1 > {
@@ -884,7 +893,7 @@ struct Quantizer8bitDirectSigned<8> : Quantizer8bitDirectSigned<1> {
884893
885894#endif
886895
887- #ifdef __aarch64__
896+ #ifdef USE_NEON
888897
889898template <>
890899struct Quantizer8bitDirectSigned <8 > : Quantizer8bitDirectSigned<1 > {
@@ -1242,7 +1251,7 @@ struct SimilarityL2<8> {
12421251
12431252#endif
12441253
1245- #ifdef __aarch64__
1254+ #ifdef USE_NEON
12461255template <>
12471256struct SimilarityL2 <8 > {
12481257 static constexpr int simdwidth = 8 ;
@@ -1402,7 +1411,7 @@ struct SimilarityIP<8> {
14021411};
14031412#endif
14041413
1405- #ifdef __aarch64__
1414+ #ifdef USE_NEON
14061415
14071416template <>
14081417struct SimilarityIP <8 > {
@@ -1599,7 +1608,7 @@ struct DCTemplate<Quantizer, Similarity, 8> : SQDistanceComputer {
15991608
16001609#endif
16011610
1602- #ifdef __aarch64__
1611+ #ifdef USE_NEON
16031612
16041613template <class Quantizer , class Similarity >
16051614struct DCTemplate <Quantizer, Similarity, 8 > : SQDistanceComputer {
@@ -1815,7 +1824,7 @@ struct DistanceComputerByte<Similarity, 8> : SQDistanceComputer {
18151824
18161825#endif
18171826
1818- #ifdef __aarch64__
1827+ #ifdef USE_NEON
18191828
18201829template <class Similarity >
18211830struct DistanceComputerByte <Similarity, 8 > : SQDistanceComputer {
@@ -2041,7 +2050,7 @@ ScalarQuantizer::SQuantizer* ScalarQuantizer::select_quantizer() const {
20412050 if (d % 16 == 0 ) {
20422051 return select_quantizer_1<16 >(qtype, d, trained);
20432052 } else
2044- #elif defined(USE_F16C) || defined(__aarch64__ )
2053+ #elif defined(USE_F16C) || defined(USE_NEON )
20452054 if (d % 8 == 0 ) {
20462055 return select_quantizer_1<8 >(qtype, d, trained);
20472056 } else
@@ -2082,7 +2091,7 @@ SQDistanceComputer* ScalarQuantizer::get_distance_computer(
20822091 qtype, d, trained);
20832092 }
20842093 } else
2085- #elif defined(USE_F16C) || defined(__aarch64__ )
2094+ #elif defined(USE_F16C) || defined(USE_NEON )
20862095 if (d % 8 == 0 ) {
20872096 if (metric == METRIC_L2) {
20882097 return select_distance_computer<SimilarityL2<8 >>(qtype, d, trained);
@@ -2457,7 +2466,7 @@ InvertedListScanner* ScalarQuantizer::select_InvertedListScanner(
24572466 return sel0_InvertedListScanner<16 >(
24582467 mt, this , quantizer, store_pairs, sel, by_residual);
24592468 } else
2460- #elif defined(USE_F16C) || defined(__aarch64__ )
2469+ #elif defined(USE_F16C) || defined(USE_NEON )
24612470 if (d % 8 == 0 ) {
24622471 return sel0_InvertedListScanner<8 >(
24632472 mt, this , quantizer, store_pairs, sel, by_residual);
0 commit comments