@@ -43,7 +43,7 @@ def crawl_descriptions(competition: str, wait: float = 3.0, force: bool = False)
4343
4444 # Get main contents
4545 contents = []
46- elements = site_body .find_elements (By .CSS_SELECTOR , ".sc-iWlrxG.cMAZdc " )
46+ elements = site_body .find_elements (By .CSS_SELECTOR , ".fbHzUd " )
4747 for e in elements :
4848 content = e .get_attribute ("innerHTML" )
4949 contents .append (content )
@@ -53,14 +53,14 @@ def crawl_descriptions(competition: str, wait: float = 3.0, force: bool = False)
5353 descriptions [subtitles [i ]] = contents [i ]
5454
5555 # Get the citation
56- element = site_body .find_element (By .CSS_SELECTOR , ".sc-ifyrTC.sc-fyziuY " )
56+ element = site_body .find_element (By .CSS_SELECTOR , ".bZEXEC " )
5757 citation = element .get_attribute ("innerHTML" )
5858 descriptions [subtitles [- 1 ]] = citation
5959
6060 data_url = f"https://www.kaggle.com/competitions/{ competition } /data"
6161 driver .get (data_url )
6262 time .sleep (wait )
63- data_element = driver .find_element (By .CSS_SELECTOR , ".sc-iWlrxG.cMAZdc " )
63+ data_element = driver .find_element (By .CSS_SELECTOR , ".fbHzUd " )
6464 descriptions ["Data Description" ] = data_element .get_attribute ("innerHTML" )
6565
6666 driver .quit ()
@@ -80,7 +80,57 @@ def download_data(competition: str, local_path: str = "/data/userdata/share/kagg
8080
8181
8282if __name__ == "__main__" :
83- download_data ("feedback-prize-english-language-learning" , "/data/userdata/share/kaggle" )
83+ dsagent_cs = [
84+ "feedback-prize-english-language-learning" ,
85+ "playground-series-s3e11" ,
86+ "playground-series-s3e14" ,
87+ "spaceship-titanic" ,
88+ "playground-series-s3e18" ,
89+ "playground-series-s3e16" ,
90+ "playground-series-s3e9" ,
91+ "playground-series-s3e25" ,
92+ "playground-series-s3e26" ,
93+ "playground-series-s3e24" ,
94+ "playground-series-s3e23" ,
95+ ]
96+
97+ other_cs = [
98+ "amp-parkinsons-disease-progression-prediction" ,
99+ "arc-prize-2024" ,
100+ "ariel-data-challenge-2024" ,
101+ "child-mind-institute-detect-sleep-states" ,
102+ "connectx" ,
103+ "contradictory-my-dear-watson" ,
104+ "digit-recognizer" ,
105+ "fathomnet-out-of-sample-detection" ,
106+ "forest-cover-type-prediction" ,
107+ "gan-getting-started" ,
108+ "google-research-identify-contrails-reduce-global-warming" ,
109+ "house-prices-advanced-regression-techniques" ,
110+ "isic-2024-challenge" ,
111+ "leash-BELKA" ,
112+ "llm-20-questions" ,
113+ "nlp-getting-started" ,
114+ "playground-series-s4e1" ,
115+ "playground-series-s4e2" ,
116+ "playground-series-s4e3" ,
117+ "playground-series-s4e4" ,
118+ "playground-series-s4e5" ,
119+ "playground-series-s4e6" ,
120+ "playground-series-s4e7" ,
121+ "playground-series-s4e8" ,
122+ "rsna-2024-lumbar-spine-degenerative-classification" ,
123+ "sf-crime" ,
124+ "store-sales-time-series-forecasting" ,
125+ "titanic" ,
126+ "tpu-getting-started" ,
127+ "covid19-global-forecasting-week-1" ,
128+ "birdsong-recognition" ,
129+ "optiver-trading-at-the-close" ,
130+ ]
131+
132+ for i in dsagent_cs + other_cs :
133+ crawl_descriptions (i )
84134 exit ()
85135 from kaggle .api .kaggle_api_extended import KaggleApi
86136
0 commit comments