feat: Supporting Model Specifications (#319)

xisen-w · web-flow · commit e1264719e10b · 2024-09-25T07:42:28.000+01:00
* Key changes

* Revised to support submission specifications

* Revised to support submission specifications

* revise CI

* CI-Fix

* fixing-CI
diff --git a/rdagent/scenarios/kaggle/experiment/prompts.yaml b/rdagent/scenarios/kaggle/experiment/prompts.yaml
@@ -8,6 +8,8 @@ kg_description_template:
       "Competition Type": "The type of competition, e.g., 'Classification', 'Regression', 'Clustering', 'Prediction", "Time-Series Forecasting",
       "Competition Description": "A brief description of the competition",
       "Target Description": "A description of the target variable to be predicted",
+      "Competition Features": "Two-line description of the overall features involved within the competition as background."
+      "Submission Specifications": "The submission specification & sample submission csv descriptions for the model to output."
     }
     Since these might be very similar column names in data like one_hot_encoded columns, you can use some regex to group them together.
 
@@ -20,7 +22,7 @@ kg_description_template:
 
 kg_background: |-
   You are solving a data science tasks and the type of the competition is {{ competition_type }}.
-  The competition description is:{{competition_description}}
+  The competition description is:{{competition_description}}. 
   
   We provide an overall script in file: train.py. The user will run the train.py script along with several feature and model scripts to train several model to get a good performance on this task.
 
@@ -63,6 +65,8 @@ kg_background: |-
 
   For each loop, you need to help user decide which action item to choose and provide the corresponding code to implement the action item.
 
+  Most importantly, the output format & submission requirements are listed here: {submission_specifications}
+
 kg_feature_interface: |-
   Your code should contain several parts:
   1. The import part: import the necessary libraries.
diff --git a/rdagent/scenarios/kaggle/experiment/scenario.py b/rdagent/scenarios/kaggle/experiment/scenario.py
@@ -34,6 +34,7 @@ def __init__(self, competition: str) -> None:
         self.competition_description = None
         self.target_description = None
         self.competition_features = None
+        self.submission_specifications = None
         self._analysis_competition_description()
         self.if_action_choosing_based_on_UCB = KAGGLE_IMPLEMENT_SETTING.if_action_choosing_based_on_UCB
 
@@ -66,13 +67,16 @@ def _analysis_competition_description(self):
         self.competition_description = response_json_analysis.get("Competition Description", "No description provided")
         self.target_description = response_json_analysis.get("Target Description", "No target provided")
         self.competition_features = response_json_analysis.get("Competition Features", "No features provided")
+        self.submission_specifications = response_json_analysis.get(
+            "Submission Specifications", "No submission requirements provided"
+        )
 
     def get_competition_full_desc(self) -> str:
         return f"""Competition Type: {self.competition_type}
-Competition Description: {self.competition_description}
-Target Description: {self.target_description}
-Competition Features: {self.competition_features}
-"""
+    Competition Description: {self.competition_description}
+    Target Description: {self.target_description}
+    Competition Features: {self.competition_features}
+    """
 
     @property
     def background(self) -> str:
@@ -91,6 +95,7 @@ def background(self) -> str:
                 competition_description=self.competition_description,
                 target_description=self.target_description,
                 competition_features=self.competition_features,
+                submission_specifications=self.submission_specifications,
             )
         )
         return background_prompt
@@ -165,4 +170,6 @@ def get_scenario_all_desc(self) -> str:
 {self._output_format}
 The simulator user can use to test your model:
 {self._simulator}
+The expected output & submission format specifications:
+{self.submission_specifications} # Added again to emphasize the importance
 """