NVIDIA-NeMo
diff --git a/‎.github/workflows/cicd-main.yml‎
Lines changed: 14 additions & 28 deletions b/‎.github/workflows/cicd-main.yml‎
Lines changed: 14 additions & 28 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 6 additions & 0 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 108 additions & 19 deletions b/‎CONTRIBUTING.md‎
Lines changed: 108 additions & 19 deletions
diff --git a/‎README.md‎
Lines changed: 10 additions & 7 deletions b/‎README.md‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎docs/nsys-profiling.md‎
Lines changed: 4 additions & 11 deletions b/‎docs/nsys-profiling.md‎
Lines changed: 4 additions & 11 deletions
diff --git a/‎docs/testing.md‎
Lines changed: 11 additions & 6 deletions b/‎docs/testing.md‎
Lines changed: 11 additions & 6 deletions
diff --git a/‎examples/configs/dpo.yaml‎
Lines changed: 1 addition & 0 deletions b/‎examples/configs/dpo.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml‎
Lines changed: 1 addition & 0 deletions b/‎examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml‎
Lines changed: 1 addition & 0 deletions b/‎examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml‎
Lines changed: 1 addition & 0 deletions b/‎examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml‎
Lines changed: 1 addition & 0 deletions
@@ -139,31 +139,18 @@ jobs:
           uv venv
           uv run --group dev pre-commit install
           uv run --group dev pre-commit run --all-files --show-diff-on-failure --color=always
-      - name: Minimize uv cache
-        run: uv cache prune --ci
-
-  mypy-check:
-    name: Mypy check
-    needs: [pre-flight]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          version: "0.7.2"
-          enable-cache: true
-          prune-cache: false
-      # Faster than uv python install since it caches python alongside runner
-      - name: "Set up Python"
-        uses: actions/setup-python@v5
-        with:
-          python-version-file: ".python-version"
-      - name: Check mypy
+      # TODO: this is a temporary check and should be removed once we have 100% correctness
+      - name: Check if any files with zero errors not in whitelist
         run: |
-          uv venv
-          uv run --group test mypy nemo_rl examples
+          missing_count=0
+          for file in $(uv run --group dev pyrefly check $(git ls-files 'nemo_rl/**/*.py' 'examples/**/*.py' 'docs/*.py' 'tools/**/*.py') --output-format json | jq -r --slurpfile all_files <(git ls-files 'nemo_rl/**/*.py' 'examples/**/*.py' 'docs/*.py' 'tools/**/*.py' | jq -R -s 'split("\n")[:-1]') --arg pwd "$(pwd)/" '(.errors | group_by(.path) | map({(.[0].path | sub($pwd; "")): length}) | add // {}) as $error_counts | $all_files[0][] | . as $file | if ($error_counts[$file] // 0) == 0 then $file else empty end'); do
+            if ! fgrep -q "$file" pyrefly.toml; then
+              echo "File $file has zero errors but is not in pyrefly.toml in the 'project-includes' list. Please add it to this whitelist."
+              ((missing_count++))
+            fi
+          done
+          
+          exit $missing_count
       - name: Minimize uv cache
         run: uv cache prune --ci
 
@@ -221,8 +208,8 @@ jobs:
       UNIT_TEST_SCRIPT: |
         cd /opt/nemo-rl
         if [[ "${{ needs.pre-flight.outputs.test_level }}" =~ ^(L0|L1|L2)$ ]]; then
-          uv run --no-sync bash -x ./tests/run_unit.sh --cov=nemo_rl -m \"not mcore\"
-          uv run --extra mcore bash -x ./tests/run_unit.sh --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json -m mcore
+          uv run --no-sync bash -x ./tests/run_unit.sh --cov=nemo_rl --hf-gated
+          uv run --extra mcore bash -x ./tests/run_unit.sh --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --mcore-only
         else
           echo Skipping unit tests for docs-only level
         fi
@@ -319,8 +306,7 @@ jobs:
                 (
                   needs.pre-flight.outputs.test_level != 'none' &&
                   needs.sphinx-build.result == 'success' &&
-                  needs.tests.result == 'success' &&
-                  (needs.mypy-check.result == 'success' || true)
+                  needs.tests.result == 'success'
                 )
               )
             }}
 
@@ -35,3 +35,9 @@ repos:
         files: '.*\/[^\/]*_[^\/]*\.md$'
         exclude: '^\.github/'
         types: [file]
+
+  - repo: https://github.com/facebook/pyrefly
+    rev: 0.24.2
+    hooks:
+      - id: pyrefly-typecheck
+        files: \.py$
@@ -7,53 +7,142 @@ Thanks for your interest in contributing to Nemo-RL!
 ### Development Environment
 
 1. **Build and run the Docker container**:
-```bash
-docker buildx build -t nemo-rl -f Dockerfile .
+```sh
+docker buildx build -t nemo-rl:latest -f Dockerfile .
+```
+
+To start a shell in the container to interactively run/develop:
+```sh
 # Run the container with your local nemo-rl directory mounted
-docker run -it --gpus all -v /path/to/nemo-rl:/workspace/nemo-rl nemo-rl
+docker run -it --gpus all -v /path/to/nemo-rl:/nemo-rl nemo-rl:latest
+```
+
+If you are using VSCode/Cursor you can also use Dev Containers. Here's a devcontainer.json to get you started:
+```jsonc
+{
+    "name": "rl-dev",
+    "image": "nemo-rl:latest",
+    "runArgs": [
+        "--gpus",
+        "all",
+        "--ulimit",
+        "memlock=-1",
+        "--ulimit",
+        "stack=67108864",
+        "--shm-size=24g",
+        "--privileged",
+        "--pid=host"
+	]
+
+    // NOTE: Here is an example of how you can set up some common mounts, environment variables, and set up your shell.
+    //       Feel free to adapt to your development workflow and remember to replace the user `terryk` with your username.
+
+    //"mounts": [
+    //    {"source": "/home/terryk", "target": "/home/terryk", "type": "bind"},
+    //    {"source": "/home/terryk/.ssh", "target": "/root/terryk-ssh", "type": "bind"}
+    //],
+    //"containerEnv": {
+    //    "HF_TOKEN_PATH": "/home/terryk/.cache/huggingface/token",
+    //    "HF_HOME": "/home/terryk/.cache/huggingface",
+    //    "HF_DATASETS_CACHE": "/home/terryk/.cache/huggingface/datasets",
+    //    "WANDB_API_KEY": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+    //},
+    // // This (1) marks all directories safe (2) copies in ssh keys (3) sources user's bashrc file
+    //"postStartCommand": "git config --global --add safe.directory '*' && cp -r /root/terryk-ssh/* /root/.ssh/ && source /home/terryk/.bashrc"
+}
 ```
 
 ## Making Changes
 
-### Workflow: Clone and Branch (No Fork Required)
+### Workflow: For External Contributors (Fork Required)
 
 #### Before You Start: Install pre-commit
 
-From the [`nemo-rl` root directory](.), run:
-```bash
-python3 -m pip install pre-commit
-pre-commit install
-```
+Pre-commit checks (using `ruff`/`pyrefly`) will help ensure your code follows our formatting and style guidelines.
 
-Pre-commit checks (using `ruff`) will help ensure your code follows our formatting and style guidelines.
+If you're an external contributor, you'll need to fork the repository:
 
-We follow a direct clone and branch workflow for now:
+1. **Create a fork**: Click the "Fork" button on the [GitHub repository page](https://github.com/NVIDIA-NeMo/RL) or follow this direct link: https://github.com/NVIDIA-NeMo/RL/fork
 
-1. Clone the repository directly:
+2. **Clone your fork**:
    ```bash
-   git clone https://github.com/NVIDIA-NeMo/RL
+   git clone https://github.com/YOUR-USERNAME/RL nemo-rl
    cd nemo-rl
    ```
 
-2. Create a new branch for your changes:
+3. **Add upstream remote** to keep your fork updated:
    ```bash
-   git checkout -b your-feature-name
+   git remote add upstream https://github.com/NVIDIA-NeMo/RL.git
    ```
 
-3. Make your changes and commit them:
+4. **Install pre-commit**:
+   ```bash
+   # Requires `uv` to be installed
+   uv run --group dev pre-commit install
+   ```
+
+5. **Keep your fork updated** before starting new work:
+   ```bash
+   git fetch upstream
+   git checkout main
+   git merge upstream/main
+   git push origin main
+   ```
+
+6. **Create a new branch** for your changes:
+   ```bash
+   git checkout main
+   git switch -c your-feature-name
+   ```
+
+7. **Make your changes and commit** them:
    ```bash
    git add .
    git commit --signoff -m "Your descriptive commit message"
    ```
 
 We require signing commits with `--signoff` (or `-s` for short). See [Signing Your Work](#signing-your-work) for details.
 
-4. Push your branch to the repository:
+8. **Push to your fork**:
+   ```bash
+   git push origin your-feature-name
+   ```
+
+9. **Create a pull request** from your fork's branch to the main repository's `main` branch through the GitHub web interface. For example, if your GitHub username is `terrykong` and your feature branch is `your-feature-name`, the compare URL would look like: https://github.com/NVIDIA-NeMo/RL/compare/main...terrykong:RL:your-feature-name?expand=1
+
+### Workflow: For NVIDIA Contributors (Direct Access)
+
+If you have write access to the repository (NVIDIA contributors):
+
+1. Clone the repository directly:
+   ```bash
+   git clone https://github.com/NVIDIA-NeMo/RL nemo-rl
+   cd nemo-rl
+   ```
+
+2. **Install pre-commit** from the [`nemo-rl` root directory](.):
+   ```bash
+   # Requires `uv` to be installed
+   uv run --group dev pre-commit install
+   ```
+
+3. Create a new branch for your changes:
+   ```bash
+   git switch -c your-feature-name
+   ```
+
+4. Make your changes and commit them:
+   ```bash
+   git add .
+   git commit --signoff -m "Your descriptive commit message"
+   ```
+
+5. Push your branch to the repository:
    ```bash
-   git push origin feature/your-feature-name
+   git push origin your-feature-name
    ```
 
-5. Create a pull request from your branch to the `main` branch.
+6. Create a pull request from your branch to the `main` branch.
 
 ### Design Documentation Requirement
 
 
@@ -1,8 +1,18 @@
 # Nemo RL: A Scalable and Efficient Post-Training Library
 
+## 📣 News
+* [7/25/2025] [Release v0.3.0!](https://github.com/NVIDIA-NeMo/RL/releases/tag/v0.3.0)
+    * 📝 [v0.3.0 Blog Post](https://nvidia-nemo.github.io/blog/2025/07/21/nemo-rl-v0.3/)
+    * 📊 View the release run metrics on [Google Colab](https://colab.research.google.com/drive/15kpesCV1m_C5UQFStssTEjaN2RsBMeZ0?usp=sharing) to get a head start on your experimentation.
+* [5/14/2025] [Reproduce DeepscaleR with NeMo RL!](docs/guides/grpo-deepscaler.md)
+* [5/14/2025] [Release v0.2.1!](https://github.com/NVIDIA-NeMo/RL/releases/tag/v0.2.1)
+    * 📊 View the release run metrics on [Google Colab](https://colab.research.google.com/drive/1o14sO0gj_Tl_ZXGsoYip3C0r5ofkU1Ey?usp=sharing) to get a head start on your experimentation.
+
+## Table of Contents
 <!-- markdown all in one -->
 - [Nemo RL: A Scalable and Efficient Post-Training Library](#nemo-rl-a-scalable-and-efficient-post-training-library)
   - [📣 News](#-news)
+  - [Table of Contents](#table-of-contents)
   - [Features](#features)
   - [Prerequisites](#prerequisites)
   - [Training Backends](#training-backends)
@@ -36,13 +46,6 @@ What you can expect:
 - **Flexibility** with a modular design that allows easy integration and customization.
 - **Comprehensive documentation** that is both detailed and user-friendly, with practical examples.
 
-## 📣 News
-* [7/25/2025] [Release v0.3.0!](https://github.com/NVIDIA-NeMo/RL/releases/tag/v0.3.0)
-    * 📊 View the release run metrics on [Google Colab](https://colab.research.google.com/drive/15kpesCV1m_C5UQFStssTEjaN2RsBMeZ0?usp=sharing) to get a head start on your experimentation.
-* [5/14/2025] [Reproduce DeepscaleR with NeMo RL!](docs/guides/grpo-deepscaler.md)
-* [5/14/2025] [Release v0.2.1!](https://github.com/NVIDIA-NeMo/RL/releases/tag/v0.2.1)
-    * 📊 View the release run metrics on [Google Colab](https://colab.research.google.com/drive/1o14sO0gj_Tl_ZXGsoYip3C0r5ofkU1Ey?usp=sharing) to get a head start on your experimentation.
-
 ## Features
 
 ✅ _Available now_ | 🔜 _Coming in v0.4_
 
@@ -17,7 +17,7 @@ NeMo RL supports Nsight profiling for Ray workers through environment variable p
 Set the `NRL_NSYS_WORKER_PATTERNS` environment variable with a comma-separated list of patterns to match worker names:
 
 ```bash
-export NRL_NSYS_WORKER_PATTERNS="*policy*,*vllm*"
+export NRL_NSYS_WORKER_PATTERNS="*policy*,*other-worker*"
 ```
 
 Set the `NRL_NSYS_PROFILE_STEP_RANGE` environment variable to control which training steps the profiler captures. Its
@@ -40,7 +40,7 @@ export NRL_NSYS_PROFILE_STEP_RANGE=3:5
 
 The supported worker types are:
 - **DTensorPolicyWorker**: Pattern matched against `"dtensor_policy_worker"`
-- **VllmGenerationWorker**: Pattern matched against `"vllm_generation_worker"`
+- **MegatronPolicyWorker**: Pattern matched against `"megatron_policy_worker"`
 
 ## Example Usage
 
@@ -49,16 +49,10 @@ The supported worker types are:
 NRL_NSYS_PROFILE_STEP_RANGE=2:3 NRL_NSYS_WORKER_PATTERNS="*policy*" uv run examples/run_grpo_math.py grpo.max_num_steps=5
 ```
 
-### Profile Multiple Worker Types
-
-```bash
-NRL_NSYS_PROFILE_STEP_RANGE=1:2 NRL_NSYS_WORKER_PATTERNS="*policy*,*vllm*" uv run examples/run_grpo_math.py grpo.max_num_steps=5
-```
-
 ### Profile Workers with Exact Names
 
 ```bash
-NRL_NSYS_PROFILE_STEP_RANGE=3:10 NRL_NSYS_WORKER_PATTERNS="dtensor_policy_worker,vllm_generation_worker" uv run examples/run_grpo_math.py grpo.max_num_steps=5
+NRL_NSYS_PROFILE_STEP_RANGE=3:10 NRL_NSYS_WORKER_PATTERNS="dtensor_policy_worker" uv run examples/run_grpo_math.py grpo.max_num_steps=5
 ```
 
 ### Profile Megatron Workers
@@ -69,7 +63,7 @@ To profile a Megatron worker, you should set `LD_LIBRARY_PATH` as follows, other
 
 ```bash
 LD_LIBRARY_PATH="/usr/local/cuda/targets/x86_64-linux/lib:/usr/local/cuda/lib64:/usr/local/cuda/lib:/usr/local/nvidia/lib64:/usr/local/nvidia/lib:/usr/lib/x86_64-linux-gnu" \
-NRL_NSYS_PROFILE_STEP_RANGE=2:3 NRL_NSYS_WORKER_PATTERNS="megatron_policy_worker,vllm_generation_worker" uv run examples/run_grpo_math.py --config examples/configs/grpo_math_1B_megatron.yaml grpo.max_num_steps=5
+NRL_NSYS_PROFILE_STEP_RANGE=2:3 NRL_NSYS_WORKER_PATTERNS="megatron_policy_worker" uv run examples/run_grpo_math.py --config examples/configs/grpo_math_1B_megatron.yaml grpo.max_num_steps=5
 ```
 
 ## Profile Output
@@ -84,7 +78,6 @@ When profiling is enabled, it generates the following logs and files:
 2. **Profile Files**: Each profiled worker generates a `.nsys-rep` file with naming pattern:
    ```
    dtensor_policy_worker_<NRL_NSYS_PROFILE_STEP_RANGE>_<PID>.nsys-rep
-   vllm_generation_worker_<NRL_NSYS_PROFILE_STEP_RANGE>_<PID>.nsys-rep
    ```
 
 3. **File Location**: Profile files are saved in `/tmp/ray/session*/logs/nsight/` directory on each worker node.
 
@@ -10,14 +10,19 @@ Unit tests require 2 GPUs to test the full suite.
 
 ```sh
 # Run the unit tests using local GPUs
+
+# Configuration 1: Default tests only - excludes both hf_gated and mcore tests
 uv run --group test bash tests/run_unit.sh
-```
 
-:::{note}
-Tests can also be run on Slurm with `ray.sub`, but note that some tests will be skipped
-due to no GPUs being located on the head node. To run the full suite of tests, please
-launch on a regular GPU allocation.
-:::
+# Configuration 2: Default + HF gated tests, excluding mcore tests
+uv run --group test bash tests/run_unit.sh --hf-gated
+
+# Configuration 3: ONLY mcore tests, excluding ones with hf_gated
+uv run --extra mcore --group test bash tests/run_unit.sh --mcore-only
+
+# Configuration 4: ONLY mcore tests, including ones with hf_gated
+uv run --extra mcore --group test bash tests/run_unit.sh --mcore-only --hf-gated
+```
 
 ### Run Unit Tests in a Hermetic Environment
 
 
@@ -156,6 +156,7 @@ logger:
   tensorboard_enabled: false
   mlflow_enabled: false  # Disable MLflow logging
   monitor_gpus: true  # If true, will monitor GPU usage and log to wandb and/or tensorboard
+  num_val_samples_to_print: 0  # Number of validation samples to pretty print on terminal
   wandb:
     project: "dpo-dev"
     name: "dpo"
 
@@ -80,6 +80,7 @@ logger:
   tensorboard_enabled: true
   mlflow_enabled: false
   monitor_gpus: true
+  num_val_samples_to_print: 0  # Number of validation samples to pretty print on terminal
   wandb:
     project: nemo-rl
     name: dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1
 
@@ -80,6 +80,7 @@ logger:
   tensorboard_enabled: true
   mlflow_enabled: false
   monitor_gpus: true
+  num_val_samples_to_print: 0  # Number of validation samples to pretty print on terminal
   wandb:
     project: nemo-rl
     name: dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1
 
@@ -113,6 +113,7 @@ logger:
   tensorboard_enabled: true
   mlflow_enabled: false
   monitor_gpus: true
+  num_val_samples_to_print: 0  # Number of validation samples to pretty print on terminal
   wandb:
     project: nemo-rl
     name: dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1