Skip to content

Commit 442e8e1

Browse files
feat: add --unordered to forest-cli snapshot export (#5867)
Co-authored-by: Aryan Tikarya <[email protected]>
1 parent 82f70a2 commit 442e8e1

10 files changed

Lines changed: 268 additions & 158 deletions

File tree

.github/workflows/forest.yml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,28 @@ jobs:
306306
- name: Snapshot export check v2
307307
run: ./scripts/tests/calibnet_export_f3_check.sh
308308
timeout-minutes: ${{ fromJSON(env.SCRIPT_TIMEOUT_MINUTES) }}
309+
calibnet-unordered-export-check:
310+
needs:
311+
- build-ubuntu
312+
name: Snapshot unordered export checks
313+
runs-on: ubuntu-24.04
314+
steps:
315+
- run: lscpu
316+
- uses: actions/cache@v4
317+
with:
318+
path: "${{ env.FIL_PROOFS_PARAMETER_CACHE }}"
319+
key: proof-params-keys
320+
- uses: actions/checkout@v4
321+
- uses: actions/download-artifact@v4
322+
with:
323+
name: "forest-${{ runner.os }}"
324+
path: ~/.cargo/bin
325+
- name: Set permissions
326+
run: |
327+
chmod +x ~/.cargo/bin/forest*
328+
- name: Snapshot unordered export check
329+
run: ./scripts/tests/calibnet_export_unordered_check.sh
330+
timeout-minutes: ${{ fromJSON(env.SCRIPT_TIMEOUT_MINUTES) }}
309331
calibnet-no-discovery-checks:
310332
needs:
311333
- build-ubuntu
@@ -577,6 +599,7 @@ jobs:
577599
- calibnet-wallet-check
578600
- calibnet-export-check
579601
- calibnet-export-check-v2
602+
- calibnet-unordered-export-check
580603
- calibnet-no-discovery-checks
581604
- calibnet-kademlia-checks
582605
- calibnet-eth-mapping-check

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535

3636
- [#5859](https://github.com/ChainSafe/forest/pull/5859) Added size metrics for zstd frame cache and made max size configurable via `FOREST_ZSTD_FRAME_CACHE_DEFAULT_MAX_SIZE` environment variable.
3737

38+
- [#5867](https://github.com/ChainSafe/forest/pull/5867) Added `--unordered` to `forest-cli snapshot export` for exporting `CAR` blocks in non-deterministic order for better performance with more parallelization.
39+
3840
- [#5886](https://github.com/ChainSafe/forest/issues/5886) Add `forest-tool archive merge-f3` subcommand for merging a v1 Filecoin snapshot and an F3 snapshot into a v2 Filecoin snapshot.
3941

4042
- [#4976](https://github.com/ChainSafe/forest/issues/4976) Add support for the `Filecoin.EthSubscribe` and `Filecoin.EthUnsubscribe` API methods to enable subscriptions to Ethereum event types: `heads` and `logs`.
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/usr/bin/env bash
2+
# This script is checking the correctness of
3+
# the snapshot export feature.
4+
# It requires both the `forest` and `forest-cli` binaries to be in the PATH.
5+
6+
set -eu
7+
8+
source "$(dirname "$0")/harness.sh"
9+
10+
forest_init "$@"
11+
12+
echo "Cleaning up the initial snapshot"
13+
rm --force --verbose ./*.{car,car.zst,sha256sum}
14+
15+
echo "Exporting zstd compressed snapshot with unordred graph traversal"
16+
$FOREST_CLI_PATH snapshot export --unordered -o unordered.forest.car.zst
17+
18+
$FOREST_CLI_PATH shutdown --force
19+
20+
for f in *.car.zst; do
21+
echo "Inspecting archive info $f"
22+
$FOREST_TOOL_PATH archive info "$f"
23+
echo "Inspecting archive metadata $f"
24+
$FOREST_TOOL_PATH archive metadata "$f"
25+
done
26+
27+
echo "Cleanup calibnet db"
28+
$FOREST_TOOL_PATH db destroy --chain calibnet --force
29+
30+
echo "Import the unordered snapshot"
31+
$FOREST_PATH --chain calibnet --encrypt-keystore false --halt-after-import --height=-100 --import-snapshot unordered.forest.car.zst
32+
33+
echo "Check if Forest is able to sync"
34+
forest_run_node_detached
35+
forest_wait_api
36+
forest_wait_for_sync
37+
forest_wait_for_healthcheck_ready

src/chain/mod.rs

Lines changed: 40 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use crate::blocks::{Tipset, TipsetKey};
1313
use crate::cid_collections::CidHashSet;
1414
use crate::db::car::forest::{self, ForestCarFrame, finalize_frame};
1515
use crate::db::{SettingsStore, SettingsStoreExt};
16-
use crate::ipld::stream_chain;
16+
use crate::ipld::{stream_chain, unordered_stream_chain};
1717
use crate::utils::db::car_stream::{CarBlock, CarBlockWrite};
1818
use crate::utils::io::{AsyncWriterWithChecksum, Checksum};
1919
use crate::utils::multihash::MultihashCode;
@@ -31,17 +31,23 @@ use std::io::{Seek as _, SeekFrom};
3131
use std::sync::Arc;
3232
use tokio::io::{AsyncWrite, AsyncWriteExt, BufWriter};
3333

34+
#[derive(Debug, Clone, Default)]
35+
pub struct ExportOptions {
36+
pub skip_checksum: bool,
37+
pub unordered: bool,
38+
pub seen: CidHashSet,
39+
}
40+
3441
pub async fn export_from_head<D: Digest>(
3542
db: &Arc<impl Blockstore + SettingsStore + Send + Sync + 'static>,
3643
lookup_depth: ChainEpochDelta,
3744
writer: impl AsyncWrite + Unpin,
38-
seen: CidHashSet,
39-
skip_checksum: bool,
45+
options: Option<ExportOptions>,
4046
) -> anyhow::Result<(Tipset, Option<digest::Output<D>>)> {
4147
let head_key = SettingsStoreExt::read_obj::<TipsetKey>(db, crate::db::setting_keys::HEAD_KEY)?
4248
.context("chain head key not found")?;
4349
let head_ts = Tipset::load_required(&db, &head_key)?;
44-
let digest = export::<D>(db, &head_ts, lookup_depth, writer, seen, skip_checksum).await?;
50+
let digest = export::<D>(db, &head_ts, lookup_depth, writer, options).await?;
4551
Ok((head_ts, digest))
4652
}
4753

@@ -52,21 +58,10 @@ pub async fn export<D: Digest>(
5258
tipset: &Tipset,
5359
lookup_depth: ChainEpochDelta,
5460
writer: impl AsyncWrite + Unpin,
55-
seen: CidHashSet,
56-
skip_checksum: bool,
61+
options: Option<ExportOptions>,
5762
) -> anyhow::Result<Option<digest::Output<D>>> {
5863
let roots = tipset.key().to_cids();
59-
export_to_forest_car::<D>(
60-
roots,
61-
None,
62-
db,
63-
tipset,
64-
lookup_depth,
65-
writer,
66-
seen,
67-
skip_checksum,
68-
)
69-
.await
64+
export_to_forest_car::<D>(roots, None, db, tipset, lookup_depth, writer, options).await
7065
}
7166

7267
/// Exports a Filecoin snapshot in v2 format
@@ -77,8 +72,7 @@ pub async fn export_v2<D: Digest>(
7772
tipset: &Tipset,
7873
lookup_depth: ChainEpochDelta,
7974
writer: impl AsyncWrite + Unpin,
80-
seen: CidHashSet,
81-
skip_checksum: bool,
75+
options: Option<ExportOptions>,
8276
) -> anyhow::Result<Option<digest::Output<D>>> {
8377
// validate f3 data
8478
if let Some((f3_cid, f3_data)) = &mut f3 {
@@ -131,8 +125,7 @@ pub async fn export_v2<D: Digest>(
131125
tipset,
132126
lookup_depth,
133127
writer,
134-
seen,
135-
skip_checksum,
128+
options,
136129
)
137130
.await
138131
}
@@ -145,9 +138,14 @@ async fn export_to_forest_car<D: Digest>(
145138
tipset: &Tipset,
146139
lookup_depth: ChainEpochDelta,
147140
writer: impl AsyncWrite + Unpin,
148-
seen: CidHashSet,
149-
skip_checksum: bool,
141+
options: Option<ExportOptions>,
150142
) -> anyhow::Result<Option<digest::Output<D>>> {
143+
let ExportOptions {
144+
skip_checksum,
145+
unordered,
146+
seen,
147+
} = options.unwrap_or_default();
148+
151149
let stateroot_lookup_limit = tipset.epoch() - lookup_depth;
152150

153151
// Wrap writer in optional checksum calculator
@@ -160,12 +158,25 @@ async fn export_to_forest_car<D: Digest>(
160158
// are small enough that keeping 1k in memory isn't a problem. Average
161159
// block size is between 1kb and 2kb.
162160
1024,
163-
stream_chain(
164-
Arc::clone(db),
165-
tipset.clone().chain_owned(Arc::clone(db)),
166-
stateroot_lookup_limit,
167-
)
168-
.with_seen(seen),
161+
if unordered {
162+
futures::future::Either::Left(
163+
unordered_stream_chain(
164+
Arc::clone(db),
165+
tipset.clone().chain_owned(Arc::clone(db)),
166+
stateroot_lookup_limit,
167+
)
168+
.with_seen(seen),
169+
)
170+
} else {
171+
futures::future::Either::Right(
172+
stream_chain(
173+
Arc::clone(db),
174+
tipset.clone().chain_owned(Arc::clone(db)),
175+
stateroot_lookup_limit,
176+
)
177+
.with_seen(seen),
178+
)
179+
},
169180
);
170181

171182
// Encode Ipld key-value pairs in zstd frames

src/chain/tests.rs

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -59,19 +59,10 @@ async fn test_export_inner(version: FilecoinSnapshotVersion) -> anyhow::Result<(
5959

6060
let checksum = match version {
6161
FilecoinSnapshotVersion::V1 => {
62-
export::<Sha256>(&db, &head, 0, &mut car_bytes, Default::default(), false).await?
62+
export::<Sha256>(&db, &head, 0, &mut car_bytes, None).await?
6363
}
6464
FilecoinSnapshotVersion::V2 => {
65-
export_v2::<Sha256>(
66-
&db,
67-
None,
68-
&head,
69-
0,
70-
&mut car_bytes,
71-
Default::default(),
72-
false,
73-
)
74-
.await?
65+
export_v2::<Sha256>(&db, None, &head, 0, &mut car_bytes, None).await?
7566
}
7667
};
7768

src/cli/subcommands/snapshot_cmd.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ pub enum SnapshotCommands {
3636
/// How many state-roots to include. Lower limit is 900 for `calibnet` and `mainnet`.
3737
#[arg(short, long)]
3838
depth: Option<crate::chain::ChainEpochDelta>,
39+
/// Traverse chain in non-deterministic order for better performance with more parallelization.
40+
#[arg(long)]
41+
unordered: bool,
3942
/// Export snapshot in the experimental v2 format(FRC-0108).
4043
#[arg(long, value_enum, default_value_t = FilecoinSnapshotVersion::V1)]
4144
format: FilecoinSnapshotVersion,
@@ -51,6 +54,7 @@ impl SnapshotCommands {
5154
dry_run,
5255
tipset,
5356
depth,
57+
unordered,
5458
format,
5559
} => {
5660
let chain_head = ChainHead::call(&client, ()).await?;
@@ -93,6 +97,7 @@ impl SnapshotCommands {
9397
recent_roots: depth.unwrap_or(SyncConfig::default().recent_state_roots),
9498
output_path: temp_path.to_path_buf(),
9599
tipset_keys: ApiTipsetKey(Some(chain_head.key().clone())),
100+
unordered,
96101
skip_checksum,
97102
dry_run,
98103
};
@@ -128,10 +133,12 @@ impl SnapshotCommands {
128133
pb.finish();
129134
_ = handle.await;
130135

131-
if let Some(hash) = hash_result {
132-
save_checksum(&output_path, hash).await?;
136+
if !dry_run {
137+
if let Some(hash) = hash_result {
138+
save_checksum(&output_path, hash).await?;
139+
}
140+
temp_path.persist(output_path)?;
133141
}
134-
temp_path.persist(output_path)?;
135142

136143
println!("Export completed.");
137144
Ok(())

src/db/gc/snapshot.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
//!
4444
4545
use crate::blocks::{Tipset, TipsetKey};
46-
use crate::cid_collections::CidHashSet;
46+
use crate::chain::ExportOptions;
4747
use crate::cli_shared::chain_path;
4848
use crate::db::car::forest::new_forest_car_temp_path_in;
4949
use crate::db::{
@@ -223,8 +223,10 @@ where
223223
&db,
224224
self.recent_state_roots,
225225
file,
226-
CidHashSet::default(),
227-
true,
226+
Some(ExportOptions {
227+
skip_checksum: true,
228+
..Default::default()
229+
}),
228230
)
229231
.await?;
230232
let target_path = self.car_db_dir.join(format!(

0 commit comments

Comments
 (0)