Find duplicates in GISAID database dump.
This is a small utility which allows you to search for entries with duplicate names in GISAID NDJSON dump file.
# Clone git repository
git clone https://github.com/nextstrain/gisaid-dupes
cd gisaid-dupes
# Install Rustup, the Rust version manager (https://www.rust-lang.org/tools/install)
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
# Add Rust tools to the $PATH
export PATH="$PATH:$HOME/.cargo/bin"
# Run in release mode (slow to build, fast to run)
cargo run --release --bin=gisaid-dupes -- gisaid.ndjson.zst -o gisaid.dupes.csv --verbose
# Alternatively, run in debug mode (fast to build, slow to run)
cargo run --bin=gisaid-dupes -- gisaid.ndjson.zst -o gisaid.dupes.csv --verbose