@@ -7,6 +7,7 @@ use common::fixed_length_priority_queue::FixedLengthPriorityQueue;
77use common:: types:: { PointOffsetType , ScoredPointOffset } ;
88use io:: file_operations:: { atomic_save, read_bin} ;
99use itertools:: Itertools ;
10+ use memory:: madvise:: Advice ;
1011use serde:: { Deserialize , Serialize } ;
1112
1213use super :: HnswM ;
@@ -500,72 +501,99 @@ impl GraphLayers {
500501}
501502
502503impl GraphLayers {
503- pub fn load ( dir : & Path , on_disk : bool , compress : bool ) -> OperationResult < Self > {
504+ pub fn load (
505+ dir : & Path ,
506+ on_disk : bool ,
507+ force_format : Option < & GraphLinksFormatParam > ,
508+ ) -> OperationResult < Self > {
504509 let graph_data: GraphLayerData = read_bin ( & GraphLayers :: get_path ( dir) ) ?;
505-
506- if compress {
507- Self :: convert_to_compressed ( dir, HnswM :: new ( graph_data. m , graph_data. m0 ) ) ?;
508- }
509-
510+ let hnsw_m = HnswM :: new ( graph_data. m , graph_data. m0 ) ;
510511 Ok ( Self {
511- hnsw_m : HnswM :: new ( graph_data . m , graph_data . m0 ) ,
512- links : Self :: load_links ( dir, on_disk) ?,
512+ hnsw_m,
513+ links : Self :: load_links ( dir, on_disk, force_format , hnsw_m ) ?,
513514 entry_points : graph_data. entry_points . into_owned ( ) ,
514515 visited_pool : VisitedPool :: new ( ) ,
515516 } )
516517 }
517518
518- fn load_links ( dir : & Path , on_disk : bool ) -> OperationResult < GraphLinks > {
519- for format in [
519+ fn load_links (
520+ dir : & Path ,
521+ on_disk : bool ,
522+ force_format : Option < & GraphLinksFormatParam > ,
523+ hnsw_m : HnswM ,
524+ ) -> OperationResult < GraphLinks > {
525+ let formats = [
520526 GraphLinksFormat :: CompressedWithVectors ,
521527 GraphLinksFormat :: Compressed ,
522528 GraphLinksFormat :: Plain ,
523- ] {
524- let path = GraphLayers :: get_links_path ( dir, format) ;
525- if path. exists ( ) {
526- return GraphLinks :: load_from_file ( & path, on_disk, format) ;
529+ ] ;
530+
531+ match force_format {
532+ // Normal flow - load the first available format, in order of preference.
533+ None => {
534+ for format in formats {
535+ let path = GraphLayers :: get_links_path ( dir, format) ;
536+ if path. exists ( ) {
537+ return GraphLinks :: load_from_file ( & path, on_disk, format, Advice :: Random ) ;
538+ }
539+ }
527540 }
528- }
529- Err ( OperationError :: service_error ( "No links file found" ) )
530- }
531-
532- /// Convert the "plain" format into the "compressed" format.
533- /// Note: conversion into the "compressed with vectors" format is not
534- /// supported at the moment, though it is possible to implement.
535- /// As far as [`super::hnsw::LINK_COMPRESSION_CONVERT_EXISTING`] is false,
536- /// this code is not used in production.
537- fn convert_to_compressed ( dir : & Path , hnsw_m : HnswM ) -> OperationResult < ( ) > {
538- let plain_path = Self :: get_links_path ( dir, GraphLinksFormat :: Plain ) ;
539- let compressed_path = Self :: get_links_path ( dir, GraphLinksFormat :: Compressed ) ;
540- let compressed_with_vectors_path =
541- Self :: get_links_path ( dir, GraphLinksFormat :: CompressedWithVectors ) ;
542-
543- if compressed_path. exists ( ) || compressed_with_vectors_path. exists ( ) {
544- return Ok ( ( ) ) ;
545- }
541+ // Forced format (tests/benchmarking only) - convert if necessary.
542+ Some ( force_format) => {
543+ // Happy path - the file already exists
544+ let path = GraphLayers :: get_links_path ( dir, force_format. as_format ( ) ) ;
545+ if path. exists ( ) {
546+ return GraphLinks :: load_from_file (
547+ & path,
548+ on_disk,
549+ force_format. as_format ( ) ,
550+ Advice :: Random ,
551+ ) ;
552+ }
546553
547- let start = std:: time:: Instant :: now ( ) ;
548-
549- let links = GraphLinks :: load_from_file ( & plain_path, true , GraphLinksFormat :: Plain ) ?;
550- let original_size = plain_path. metadata ( ) ?. len ( ) ;
551- atomic_save ( & compressed_path, |writer| {
552- let edges = links. to_edges ( ) ;
553- serialize_graph_links ( edges, GraphLinksFormatParam :: Compressed , hnsw_m, writer)
554- } ) ?;
555- let new_size = compressed_path. metadata ( ) ?. len ( ) ;
556-
557- // Remove the original file
558- std:: fs:: remove_file ( plain_path) ?;
559-
560- log:: debug!(
561- "Compressed HNSW graph links in {:.1?}: {:.1}MB -> {:.1}MB ({:.1}%)" ,
562- start. elapsed( ) ,
563- original_size as f64 / 1024.0 / 1024.0 ,
564- new_size as f64 / 1024.0 / 1024.0 ,
565- new_size as f64 / original_size as f64 * 100.0 ,
566- ) ;
554+ // Unhappy path - convert from another format.
555+ for format in formats {
556+ let original_path = GraphLayers :: get_links_path ( dir, format) ;
557+ if format == force_format. as_format ( ) || !original_path. exists ( ) {
558+ continue ;
559+ }
567560
568- Ok ( ( ) )
561+ let start = std:: time:: Instant :: now ( ) ;
562+ let edges = GraphLinks :: load_from_file (
563+ & original_path,
564+ on_disk,
565+ format,
566+ Advice :: Sequential ,
567+ ) ?
568+ . to_edges ( ) ;
569+ let original_size = original_path. metadata ( ) ?. len ( ) ;
570+ atomic_save ( & path, |writer| {
571+ serialize_graph_links ( edges, * force_format, hnsw_m, writer)
572+ } ) ?;
573+ let new_size = path. metadata ( ) ?. len ( ) ;
574+
575+ // NOTE: The original file is not removed.
576+
577+ log:: info!(
578+ "Converted HNSW graph links in {:.1?}: {:.1}MB -> {:.1}MB ({:.1}%)" ,
579+ start. elapsed( ) ,
580+ original_size as f64 / 1024.0 / 1024.0 ,
581+ new_size as f64 / 1024.0 / 1024.0 ,
582+ new_size as f64 / original_size as f64 * 100.0 ,
583+ ) ;
584+
585+ return GraphLinks :: load_from_file (
586+ & path,
587+ on_disk,
588+ force_format. as_format ( ) ,
589+ Advice :: Random ,
590+ ) ;
591+ }
592+ }
593+ }
594+ Err ( OperationError :: service_error ( format ! (
595+ "No HNSW graph links file found in {dir:?}"
596+ ) ) )
569597 }
570598
571599 #[ cfg( feature = "testing" ) ]
@@ -684,12 +712,10 @@ mod tests {
684712 }
685713
686714 #[ rstest]
687- #[ case:: uncompressed( ( GraphLinksFormat :: Plain , false ) ) ]
688- #[ case:: converted( ( GraphLinksFormat :: Plain , true ) ) ]
689- #[ case:: compressed( ( GraphLinksFormat :: Compressed , false ) ) ]
690- #[ case:: recompressed( ( GraphLinksFormat :: Compressed , true ) ) ]
691- #[ case:: compressed_with_vectors( ( GraphLinksFormat :: CompressedWithVectors , false ) ) ]
692- fn test_save_and_load ( #[ case] ( initial_format, compress) : ( GraphLinksFormat , bool ) ) {
715+ #[ case:: uncompressed( GraphLinksFormat :: Plain ) ]
716+ #[ case:: compressed( GraphLinksFormat :: Compressed ) ]
717+ #[ case:: compressed_with_vectors( GraphLinksFormat :: CompressedWithVectors ) ]
718+ fn test_save_and_load ( #[ case] initial_format : GraphLinksFormat ) {
693719 let distance = Distance :: Cosine ;
694720 let num_vectors = 100 ;
695721 let dim = 8 ;
@@ -706,30 +732,45 @@ mod tests {
706732 M ,
707733 dim,
708734 false ,
709- initial_format . is_with_vectors ( ) ,
735+ true ,
710736 distance,
711737 & mut rng,
712738 ) ;
739+ let graph_links_vectors = vector_holder. graph_links_vectors ( ) ;
713740 let graph1 = graph_layers_builder
714741 . into_graph_layers (
715742 dir. path ( ) ,
716- initial_format. with_param_for_tests ( vector_holder . graph_links_vectors ( ) . as_ref ( ) ) ,
743+ initial_format. with_param_for_tests ( graph_links_vectors. as_ref ( ) ) ,
717744 true ,
718745 )
719746 . unwrap ( ) ;
720747 assert_eq ! ( graph1. links. format( ) , initial_format) ;
721748 let res1 = search_in_graph ( & query, top, & vector_holder, & graph1) ;
722749 drop ( graph1) ;
723750
724- let graph2 = GraphLayers :: load ( dir. path ( ) , false , compress) . unwrap ( ) ;
725- if compress {
726- assert_eq ! ( graph2. links. format( ) , GraphLinksFormat :: Compressed ) ;
727- } else {
728- assert_eq ! ( graph2. links. format( ) , initial_format) ;
729- }
730- let res2 = search_in_graph ( & query, top, & vector_holder, & graph2) ;
751+ for force_format in [
752+ None ,
753+ Some ( GraphLinksFormat :: Plain ) ,
754+ Some ( GraphLinksFormat :: Compressed ) ,
755+ Some ( GraphLinksFormat :: CompressedWithVectors ) ,
756+ ] {
757+ eprintln ! ( "force_format = {force_format:?}" ) ;
758+ let graph2 = GraphLayers :: load (
759+ dir. path ( ) ,
760+ false ,
761+ force_format
762+ . map ( |fmt| fmt. with_param ( graph_links_vectors. as_ref ( ) ) )
763+ . as_ref ( ) ,
764+ )
765+ . unwrap ( ) ;
766+ assert_eq ! (
767+ graph2. links. format( ) ,
768+ force_format. unwrap_or( initial_format)
769+ ) ;
770+ let res2 = search_in_graph ( & query, top, & vector_holder, & graph2) ;
731771
732- assert_eq ! ( res1, res2)
772+ assert_eq ! ( res1, res2)
773+ }
733774 }
734775
735776 #[ rstest]
0 commit comments