@@ -7,6 +7,7 @@ use common::fixed_length_priority_queue::FixedLengthPriorityQueue;
77use common:: types:: { PointOffsetType , ScoredPointOffset } ;
88use io:: file_operations:: { atomic_save, read_bin} ;
99use itertools:: Itertools ;
10+ use memory:: madvise:: Advice ;
1011use serde:: { Deserialize , Serialize } ;
1112
1213use super :: HnswM ;
@@ -500,72 +501,97 @@ impl GraphLayers {
500501}
501502
502503impl GraphLayers {
503- pub fn load ( dir : & Path , on_disk : bool , compress : bool ) -> OperationResult < Self > {
504+ pub fn load (
505+ dir : & Path ,
506+ on_disk : bool ,
507+ force_format : Option < & GraphLinksFormatParam > ,
508+ ) -> OperationResult < Self > {
504509 let graph_data: GraphLayerData = read_bin ( & GraphLayers :: get_path ( dir) ) ?;
505-
506- if compress {
507- Self :: convert_to_compressed ( dir, HnswM :: new ( graph_data. m , graph_data. m0 ) ) ?;
508- }
509-
510+ let hnsw_m = HnswM :: new ( graph_data. m , graph_data. m0 ) ;
510511 Ok ( Self {
511- hnsw_m : HnswM :: new ( graph_data . m , graph_data . m0 ) ,
512- links : Self :: load_links ( dir, on_disk) ?,
512+ hnsw_m,
513+ links : Self :: load_links ( dir, on_disk, force_format , hnsw_m ) ?,
513514 entry_points : graph_data. entry_points . into_owned ( ) ,
514515 visited_pool : VisitedPool :: new ( ) ,
515516 } )
516517 }
517518
518- fn load_links ( dir : & Path , on_disk : bool ) -> OperationResult < GraphLinks > {
519- for format in [
519+ fn load_links (
520+ dir : & Path ,
521+ on_disk : bool ,
522+ force_format : Option < & GraphLinksFormatParam > ,
523+ hnsw_m : HnswM ,
524+ ) -> OperationResult < GraphLinks > {
525+ let formats = [
520526 GraphLinksFormat :: CompressedWithVectors ,
521527 GraphLinksFormat :: Compressed ,
522528 GraphLinksFormat :: Plain ,
523- ] {
524- let path = GraphLayers :: get_links_path ( dir, format) ;
525- if path. exists ( ) {
526- return GraphLinks :: load_from_file ( & path, on_disk, format) ;
529+ ] ;
530+
531+ match force_format {
532+ // Normal flow - load the first available format, in order of preference.
533+ None => {
534+ for format in formats {
535+ let path = GraphLayers :: get_links_path ( dir, format) ;
536+ if path. exists ( ) {
537+ return GraphLinks :: load_from_file ( & path, on_disk, format, Advice :: Random ) ;
538+ }
539+ }
527540 }
528- }
529- Err ( OperationError :: service_error ( "No links file found" ) )
530- }
531-
532- /// Convert the "plain" format into the "compressed" format.
533- /// Note: conversion into the "compressed with vectors" format is not
534- /// supported at the moment, though it is possible to implement.
535- /// As far as [`super::hnsw::LINK_COMPRESSION_CONVERT_EXISTING`] is false,
536- /// this code is not used in production.
537- fn convert_to_compressed ( dir : & Path , hnsw_m : HnswM ) -> OperationResult < ( ) > {
538- let plain_path = Self :: get_links_path ( dir, GraphLinksFormat :: Plain ) ;
539- let compressed_path = Self :: get_links_path ( dir, GraphLinksFormat :: Compressed ) ;
540- let compressed_with_vectors_path =
541- Self :: get_links_path ( dir, GraphLinksFormat :: CompressedWithVectors ) ;
542-
543- if compressed_path. exists ( ) || compressed_with_vectors_path. exists ( ) {
544- return Ok ( ( ) ) ;
545- }
541+ // Forced format (tests/benchmarking only) - convert if necessary.
542+ Some ( force_format) => {
543+ // Happy path - the file already exists
544+ let path = GraphLayers :: get_links_path ( dir, force_format. as_format ( ) ) ;
545+ if path. exists ( ) {
546+ return GraphLinks :: load_from_file (
547+ & path,
548+ on_disk,
549+ force_format. as_format ( ) ,
550+ Advice :: Random ,
551+ ) ;
552+ }
546553
547- let start = std:: time:: Instant :: now ( ) ;
548-
549- let links = GraphLinks :: load_from_file ( & plain_path, true , GraphLinksFormat :: Plain ) ?;
550- let original_size = plain_path. metadata ( ) ?. len ( ) ;
551- atomic_save ( & compressed_path, |writer| {
552- let edges = links. to_edges ( ) ;
553- serialize_graph_links ( edges, GraphLinksFormatParam :: Compressed , hnsw_m, writer)
554- } ) ?;
555- let new_size = compressed_path. metadata ( ) ?. len ( ) ;
556-
557- // Remove the original file
558- std:: fs:: remove_file ( plain_path) ?;
559-
560- log:: debug!(
561- "Compressed HNSW graph links in {:.1?}: {:.1}MB -> {:.1}MB ({:.1}%)" ,
562- start. elapsed( ) ,
563- original_size as f64 / 1024.0 / 1024.0 ,
564- new_size as f64 / 1024.0 / 1024.0 ,
565- new_size as f64 / original_size as f64 * 100.0 ,
566- ) ;
554+ // Unhappy path - convert from another format.
555+ for format in formats {
556+ let original_path = GraphLayers :: get_links_path ( dir, format) ;
557+ if format == force_format. as_format ( ) || !original_path. exists ( ) {
558+ continue ;
559+ }
567560
568- Ok ( ( ) )
561+ let start = std:: time:: Instant :: now ( ) ;
562+ let edges = GraphLinks :: load_from_file (
563+ & original_path,
564+ on_disk,
565+ format,
566+ Advice :: Sequential ,
567+ ) ?
568+ . to_edges ( ) ;
569+ let original_size = original_path. metadata ( ) ?. len ( ) ;
570+ atomic_save ( & path, |writer| {
571+ serialize_graph_links ( edges, * force_format, hnsw_m, writer)
572+ } ) ?;
573+ let new_size = path. metadata ( ) ?. len ( ) ;
574+
575+ // NOTE: The original file is not removed.
576+
577+ log:: debug!(
578+ "Converted HNSW graph links in {:.1?}: {:.1}MB -> {:.1}MB ({:.1}%)" ,
579+ start. elapsed( ) ,
580+ original_size as f64 / 1024.0 / 1024.0 ,
581+ new_size as f64 / 1024.0 / 1024.0 ,
582+ new_size as f64 / original_size as f64 * 100.0 ,
583+ ) ;
584+
585+ return GraphLinks :: load_from_file (
586+ & path,
587+ on_disk,
588+ force_format. as_format ( ) ,
589+ Advice :: Random ,
590+ ) ;
591+ }
592+ }
593+ }
594+ Err ( OperationError :: service_error ( "No links file found" ) )
569595 }
570596
571597 #[ cfg( feature = "testing" ) ]
@@ -684,12 +710,10 @@ mod tests {
684710 }
685711
686712 #[ rstest]
687- #[ case:: uncompressed( ( GraphLinksFormat :: Plain , false ) ) ]
688- #[ case:: converted( ( GraphLinksFormat :: Plain , true ) ) ]
689- #[ case:: compressed( ( GraphLinksFormat :: Compressed , false ) ) ]
690- #[ case:: recompressed( ( GraphLinksFormat :: Compressed , true ) ) ]
691- #[ case:: compressed_with_vectors( ( GraphLinksFormat :: CompressedWithVectors , false ) ) ]
692- fn test_save_and_load ( #[ case] ( initial_format, compress) : ( GraphLinksFormat , bool ) ) {
713+ #[ case:: uncompressed( GraphLinksFormat :: Plain ) ]
714+ #[ case:: compressed( GraphLinksFormat :: Compressed ) ]
715+ #[ case:: compressed_with_vectors( GraphLinksFormat :: CompressedWithVectors ) ]
716+ fn test_save_and_load ( #[ case] initial_format : GraphLinksFormat ) {
693717 let distance = Distance :: Cosine ;
694718 let num_vectors = 100 ;
695719 let dim = 8 ;
@@ -706,30 +730,45 @@ mod tests {
706730 M ,
707731 dim,
708732 false ,
709- initial_format . is_with_vectors ( ) ,
733+ true ,
710734 distance,
711735 & mut rng,
712736 ) ;
737+ let graph_links_vectors = vector_holder. graph_links_vectors ( ) ;
713738 let graph1 = graph_layers_builder
714739 . into_graph_layers (
715740 dir. path ( ) ,
716- initial_format. with_param_for_tests ( vector_holder . graph_links_vectors ( ) . as_ref ( ) ) ,
741+ initial_format. with_param_for_tests ( graph_links_vectors. as_ref ( ) ) ,
717742 true ,
718743 )
719744 . unwrap ( ) ;
720745 assert_eq ! ( graph1. links. format( ) , initial_format) ;
721746 let res1 = search_in_graph ( & query, top, & vector_holder, & graph1) ;
722747 drop ( graph1) ;
723748
724- let graph2 = GraphLayers :: load ( dir. path ( ) , false , compress) . unwrap ( ) ;
725- if compress {
726- assert_eq ! ( graph2. links. format( ) , GraphLinksFormat :: Compressed ) ;
727- } else {
728- assert_eq ! ( graph2. links. format( ) , initial_format) ;
729- }
730- let res2 = search_in_graph ( & query, top, & vector_holder, & graph2) ;
749+ for force_format in [
750+ None ,
751+ Some ( GraphLinksFormat :: Plain ) ,
752+ Some ( GraphLinksFormat :: Compressed ) ,
753+ Some ( GraphLinksFormat :: CompressedWithVectors ) ,
754+ ] {
755+ eprintln ! ( "force_format = {force_format:?}" ) ;
756+ let graph2 = GraphLayers :: load (
757+ dir. path ( ) ,
758+ false ,
759+ force_format
760+ . map ( |fmt| fmt. with_param ( graph_links_vectors. as_ref ( ) ) )
761+ . as_ref ( ) ,
762+ )
763+ . unwrap ( ) ;
764+ assert_eq ! (
765+ graph2. links. format( ) ,
766+ force_format. unwrap_or( initial_format)
767+ ) ;
768+ let res2 = search_in_graph ( & query, top, & vector_holder, & graph2) ;
731769
732- assert_eq ! ( res1, res2)
770+ assert_eq ! ( res1, res2)
771+ }
733772 }
734773
735774 #[ rstest]
0 commit comments