Skip to content

Commit 22b1e62

Browse files
committed
Tweaking a function from flow which will be used to create a task dict as a pre step for publish
1 parent 09ff907 commit 22b1e62

File tree

1 file changed

+92
-1
lines changed

1 file changed

+92
-1
lines changed

openml/tasks/task.py

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import io
22
import os
3+
from collections import OrderedDict
34

45
from .. import config
56
from .. import datasets
@@ -22,7 +23,6 @@ def __init__(self, task_id, task_type_id, task_type, data_set_id,
2223
self.estimation_procedure["type"] = estimation_procedure_type
2324
self.estimation_procedure["data_splits_url"] = data_splits_url
2425
self.estimation_procedure["parameters"] = estimation_parameters
25-
#
2626
self.estimation_parameters = estimation_parameters
2727
self.evaluation_measure = evaluation_measure
2828
self.cost_matrix = cost_matrix
@@ -116,6 +116,92 @@ def remove_tag(self, tag):
116116
openml._api_calls._perform_api_call("/task/untag", data=data)
117117

118118

119+
def _to_dict(self):
120+
""" Helper function used by _to_xml and itself.
121+
122+
Creates a dictionary representation of self which can be serialized
123+
to xml by the function _to_xml. Since a flow can contain subflows
124+
(components) this helper function calls itself recursively to also
125+
serialize these flows to dictionaries.
126+
127+
Uses OrderedDict to ensure consistent ordering when converting to xml.
128+
The return value (OrderedDict) will be used to create the upload xml
129+
file. The xml file must have the tags in exactly the order given in the
130+
xsd schema of a flow (see class docstring).
131+
132+
Returns
133+
-------
134+
OrderedDict
135+
Flow represented as OrderedDict.
136+
137+
"""
138+
task_container = OrderedDict()
139+
task_dict = OrderedDict([('@xmlns:oml', 'http://openml.org/openml')])
140+
task_container['oml:task'] = task_dict
141+
_add_if_nonempty(task_dict, 'oml:task_id', self.task_id)
142+
143+
if getattr(self, "task_type_id") is None:
144+
raise ValueError("task_type_id is required but None")
145+
else:
146+
task_dict["task_type_id"] = self.task_type_id
147+
148+
for attribute in ["source_data", "target_name"]:
149+
150+
_add_if_nonempty(task_dict, 'oml:{}'.format(attribute),
151+
getattr(self, attribute))
152+
153+
flow_parameters = []
154+
for key in self.parameters:
155+
param_dict = OrderedDict()
156+
param_dict['oml:name'] = key
157+
meta_info = self.parameters_meta_info[key]
158+
159+
_add_if_nonempty(param_dict, 'oml:data_type',
160+
meta_info['data_type'])
161+
param_dict['oml:default_value'] = self.parameters[key]
162+
_add_if_nonempty(param_dict, 'oml:description',
163+
meta_info['description'])
164+
165+
for key_, value in param_dict.items():
166+
if key_ is not None and not isinstance(key_, six.string_types):
167+
raise ValueError('Parameter name %s cannot be serialized '
168+
'because it is of type %s. Only strings '
169+
'can be serialized.' % (key_, type(key_)))
170+
if value is not None and not isinstance(value, six.string_types):
171+
raise ValueError('Parameter value %s cannot be serialized '
172+
'because it is of type %s. Only strings '
173+
'can be serialized.' % (value, type(value)))
174+
175+
flow_parameters.append(param_dict)
176+
177+
flow_dict['oml:parameter'] = flow_parameters
178+
179+
components = []
180+
for key in self.components:
181+
component_dict = OrderedDict()
182+
component_dict['oml:identifier'] = key
183+
component_dict['oml:flow'] = \
184+
self.components[key]._to_dict()['oml:flow']
185+
186+
for key_ in component_dict:
187+
# We only need to check if the key is a string, because the
188+
# value is a flow. The flow itself is valid by recursion
189+
if key_ is not None and not isinstance(key_, six.string_types):
190+
raise ValueError('Parameter name %s cannot be serialized '
191+
'because it is of type %s. Only strings '
192+
'can be serialized.' % (key_, type(key_)))
193+
194+
components.append(component_dict)
195+
196+
flow_dict['oml:component'] = components
197+
flow_dict['oml:tag'] = self.tags
198+
for attribute in ["binary_url", "binary_format", "binary_md5"]:
199+
_add_if_nonempty(flow_dict, 'oml:{}'.format(attribute),
200+
getattr(self, attribute))
201+
202+
return flow_container
203+
204+
119205
def _create_task_cache_dir(task_id):
120206
task_cache_dir = os.path.join(config.get_cache_directory(), "tasks", str(task_id))
121207

@@ -125,3 +211,8 @@ def _create_task_cache_dir(task_id):
125211
# TODO add debug information!
126212
pass
127213
return task_cache_dir
214+
215+
216+
def _add_if_nonempty(dic, key, value):
217+
if value is not None:
218+
dic[key] = value

0 commit comments

Comments
 (0)