11import io
22import os
3+ from collections import OrderedDict
34
45from .. import config
56from .. import datasets
@@ -22,7 +23,6 @@ def __init__(self, task_id, task_type_id, task_type, data_set_id,
2223 self .estimation_procedure ["type" ] = estimation_procedure_type
2324 self .estimation_procedure ["data_splits_url" ] = data_splits_url
2425 self .estimation_procedure ["parameters" ] = estimation_parameters
25- #
2626 self .estimation_parameters = estimation_parameters
2727 self .evaluation_measure = evaluation_measure
2828 self .cost_matrix = cost_matrix
@@ -116,6 +116,92 @@ def remove_tag(self, tag):
116116 openml ._api_calls ._perform_api_call ("/task/untag" , data = data )
117117
118118
119+ def _to_dict (self ):
120+ """ Helper function used by _to_xml and itself.
121+
122+ Creates a dictionary representation of self which can be serialized
123+ to xml by the function _to_xml. Since a flow can contain subflows
124+ (components) this helper function calls itself recursively to also
125+ serialize these flows to dictionaries.
126+
127+ Uses OrderedDict to ensure consistent ordering when converting to xml.
128+ The return value (OrderedDict) will be used to create the upload xml
129+ file. The xml file must have the tags in exactly the order given in the
130+ xsd schema of a flow (see class docstring).
131+
132+ Returns
133+ -------
134+ OrderedDict
135+ Flow represented as OrderedDict.
136+
137+ """
138+ task_container = OrderedDict ()
139+ task_dict = OrderedDict ([('@xmlns:oml' , 'http://openml.org/openml' )])
140+ task_container ['oml:task' ] = task_dict
141+ _add_if_nonempty (task_dict , 'oml:task_id' , self .task_id )
142+
143+ if getattr (self , "task_type_id" ) is None :
144+ raise ValueError ("task_type_id is required but None" )
145+ else :
146+ task_dict ["task_type_id" ] = self .task_type_id
147+
148+ for attribute in ["source_data" , "target_name" ]:
149+
150+ _add_if_nonempty (task_dict , 'oml:{}' .format (attribute ),
151+ getattr (self , attribute ))
152+
153+ flow_parameters = []
154+ for key in self .parameters :
155+ param_dict = OrderedDict ()
156+ param_dict ['oml:name' ] = key
157+ meta_info = self .parameters_meta_info [key ]
158+
159+ _add_if_nonempty (param_dict , 'oml:data_type' ,
160+ meta_info ['data_type' ])
161+ param_dict ['oml:default_value' ] = self .parameters [key ]
162+ _add_if_nonempty (param_dict , 'oml:description' ,
163+ meta_info ['description' ])
164+
165+ for key_ , value in param_dict .items ():
166+ if key_ is not None and not isinstance (key_ , six .string_types ):
167+ raise ValueError ('Parameter name %s cannot be serialized '
168+ 'because it is of type %s. Only strings '
169+ 'can be serialized.' % (key_ , type (key_ )))
170+ if value is not None and not isinstance (value , six .string_types ):
171+ raise ValueError ('Parameter value %s cannot be serialized '
172+ 'because it is of type %s. Only strings '
173+ 'can be serialized.' % (value , type (value )))
174+
175+ flow_parameters .append (param_dict )
176+
177+ flow_dict ['oml:parameter' ] = flow_parameters
178+
179+ components = []
180+ for key in self .components :
181+ component_dict = OrderedDict ()
182+ component_dict ['oml:identifier' ] = key
183+ component_dict ['oml:flow' ] = \
184+ self .components [key ]._to_dict ()['oml:flow' ]
185+
186+ for key_ in component_dict :
187+ # We only need to check if the key is a string, because the
188+ # value is a flow. The flow itself is valid by recursion
189+ if key_ is not None and not isinstance (key_ , six .string_types ):
190+ raise ValueError ('Parameter name %s cannot be serialized '
191+ 'because it is of type %s. Only strings '
192+ 'can be serialized.' % (key_ , type (key_ )))
193+
194+ components .append (component_dict )
195+
196+ flow_dict ['oml:component' ] = components
197+ flow_dict ['oml:tag' ] = self .tags
198+ for attribute in ["binary_url" , "binary_format" , "binary_md5" ]:
199+ _add_if_nonempty (flow_dict , 'oml:{}' .format (attribute ),
200+ getattr (self , attribute ))
201+
202+ return flow_container
203+
204+
119205def _create_task_cache_dir (task_id ):
120206 task_cache_dir = os .path .join (config .get_cache_directory (), "tasks" , str (task_id ))
121207
@@ -125,3 +211,8 @@ def _create_task_cache_dir(task_id):
125211 # TODO add debug information!
126212 pass
127213 return task_cache_dir
214+
215+
216+ def _add_if_nonempty (dic , key , value ):
217+ if value is not None :
218+ dic [key ] = value
0 commit comments