Data created using this preprocessing script.
python train_cf_classifier.py --data_path data/CR_manual_annotations.csv --target_column is_cr --model_name_or_path microsoft/deberta-v3-xsmall --output_dir models/cr_classifier
python annotate_cf.py --model models/cr_classifier/checkpoint-395 --target_column is_cr
python create_feedback_contingency_results_plot.py
python train_lm.py fit --trainer.devices [0] --trainer.accelerator gpu --trainer.logger=WandbLogger --trainer.logger.name baseline
python train_ppo_reward_model.py --model_name_or_path microsoft/deberta-v3-xsmall --output_dir reward_modeling_test
python train_ppo_reward_model.py --model_name_or_path microsoft/deberta-v3-xsmall --output_dir reward_model_topline --data_paths ~/data/babylm_data/evaluation_data/blimp_filtered_childes/ ~/data/babylm_data/evaluation_data/zorro_filtered_childes/
python train_ppo.py --policy_model lightning_logs/kqb5kj4z/ckpt_huggingface_best --value_model reward_model/checkpoint-900
python create_results_visualizations.py
To use the precomputed baseline stats as reference:
python create_results_visualizations.py --results_file results/results_baselines.csv
python create_results_visualizations.py --plot_comparison_model_2 Topline