Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.

Commit 10f3cd2

Browse files
committed
Expose DB backend option in web UI
Show correct filesize approximation for HDF5
1 parent 58f72ef commit 10f3cd2

File tree

8 files changed

+109
-18
lines changed

8 files changed

+109
-18
lines changed

digits/dataset/images/classification/forms.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,29 @@ class ImageClassificationDatasetForm(ImageDatasetForm):
1515
Defines the form used to create a new ImageClassificationDatasetJob
1616
"""
1717

18+
backend = wtforms.SelectField('DB backend',
19+
choices = [
20+
('lmdb', 'LMDB'),
21+
('hdf5', 'HDF5'),
22+
],
23+
default='lmdb',
24+
)
25+
26+
def validate_backend(form, field):
27+
if field.data == 'lmdb':
28+
form.compression.data = 'none'
29+
elif field.data == 'hdf5':
30+
form.encoding.data = 'none'
31+
32+
compression = utils.forms.SelectField('DB compression',
33+
choices = [
34+
('none', 'None'),
35+
('gzip', 'GZIP'),
36+
],
37+
default='none',
38+
tooltip='Compressing the dataset may significantly decrease the size of your database files, but it may increase read and write times.',
39+
)
40+
1841
# Use a SelectField instead of a HiddenField so that the default value
1942
# is used when nothing is provided (through the REST API)
2043
method = wtforms.SelectField(u'Dataset type',

digits/dataset/images/classification/views.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,17 +84,21 @@ def from_folders(job, form):
8484

8585
### Add CreateDbTasks
8686

87+
backend = form.backend.data
8788
encoding = form.encoding.data
89+
compression = form.compression.data
8890

8991
job.tasks.append(
9092
tasks.CreateDbTask(
9193
job_dir = job.dir(),
9294
parents = parse_train_task,
9395
input_file = utils.constants.TRAIN_FILE,
9496
db_name = utils.constants.TRAIN_DB,
97+
backend = backend,
9598
image_dims = job.image_dims,
9699
resize_mode = job.resize_mode,
97100
encoding = encoding,
101+
compression = compression,
98102
mean_file = utils.constants.MEAN_FILE_CAFFE,
99103
labels_file = job.labels_file,
100104
)
@@ -107,9 +111,11 @@ def from_folders(job, form):
107111
parents = val_parents,
108112
input_file = utils.constants.VAL_FILE,
109113
db_name = utils.constants.VAL_DB,
114+
backend = backend,
110115
image_dims = job.image_dims,
111116
resize_mode = job.resize_mode,
112117
encoding = encoding,
118+
compression = compression,
113119
labels_file = job.labels_file,
114120
)
115121
)
@@ -121,9 +127,11 @@ def from_folders(job, form):
121127
parents = test_parents,
122128
input_file = utils.constants.TEST_FILE,
123129
db_name = utils.constants.TEST_DB,
130+
backend = backend,
124131
image_dims = job.image_dims,
125132
resize_mode = job.resize_mode,
126133
encoding = encoding,
134+
compression = compression,
127135
labels_file = job.labels_file,
128136
)
129137
)
@@ -141,8 +149,10 @@ def from_files(job, form):
141149
)
142150
job.labels_file = utils.constants.LABELS_FILE
143151

144-
encoding = form.encoding.data
145152
shuffle = bool(form.textfile_shuffle.data)
153+
backend = form.backend.data
154+
encoding = form.encoding.data
155+
compression = form.compression.data
146156

147157
### train
148158
if form.textfile_use_local_files.data:
@@ -162,10 +172,12 @@ def from_files(job, form):
162172
job_dir = job.dir(),
163173
input_file = train_file,
164174
db_name = utils.constants.TRAIN_DB,
175+
backend = backend,
165176
image_dims = job.image_dims,
166177
image_folder= image_folder,
167178
resize_mode = job.resize_mode,
168179
encoding = encoding,
180+
compression = compression,
169181
mean_file = utils.constants.MEAN_FILE_CAFFE,
170182
labels_file = job.labels_file,
171183
shuffle = shuffle,
@@ -192,10 +204,12 @@ def from_files(job, form):
192204
job_dir = job.dir(),
193205
input_file = val_file,
194206
db_name = utils.constants.VAL_DB,
207+
backend = backend,
195208
image_dims = job.image_dims,
196209
image_folder= image_folder,
197210
resize_mode = job.resize_mode,
198211
encoding = encoding,
212+
compression = compression,
199213
labels_file = job.labels_file,
200214
shuffle = shuffle,
201215
)
@@ -221,10 +235,12 @@ def from_files(job, form):
221235
job_dir = job.dir(),
222236
input_file = test_file,
223237
db_name = utils.constants.TEST_DB,
238+
backend = backend,
224239
image_dims = job.image_dims,
225240
image_folder= image_folder,
226241
resize_mode = job.resize_mode,
227242
encoding = encoding,
243+
compression = compression,
228244
labels_file = job.labels_file,
229245
shuffle = shuffle,
230246
)

digits/dataset/images/views.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,15 @@ def image_dataset_resize_example():
2828
height = int(flask.request.form['height'])
2929
channels = int(flask.request.form['channels'])
3030
resize_mode = flask.request.form['resize_mode']
31+
backend = flask.request.form['backend']
3132
encoding = flask.request.form['encoding']
3233

3334
image = utils.image.resize_image(image, height, width,
3435
channels=channels,
3536
resize_mode=resize_mode,
3637
)
3738

38-
if encoding == 'none':
39+
if backend != 'lmdb' or encoding == 'none':
3940
length = len(image.tostring())
4041
else:
4142
s = StringIO()

digits/dataset/tasks/create_db.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,20 @@
1717
class CreateDbTask(Task):
1818
"""Creates a database"""
1919

20-
def __init__(self, input_file, db_name, image_dims, **kwargs):
20+
def __init__(self, input_file, db_name, backend, image_dims, **kwargs):
2121
"""
2222
Arguments:
2323
input_file -- read images and labels from this file
2424
db_name -- save database to this location
25+
backend -- database backend (lmdb/hdf5)
2526
image_dims -- (height, width, channels)
2627
2728
Keyword Arguments:
2829
image_folder -- prepend image paths with this folder
2930
shuffle -- shuffle images before saving
3031
resize_mode -- used in utils.image.resize_image()
3132
encoding -- 'none', 'png' or 'jpg'
33+
compression -- 'none' or 'gzip'
3234
mean_file -- save mean file to this location
3335
labels_file -- used to print category distribution
3436
"""
@@ -37,6 +39,7 @@ def __init__(self, input_file, db_name, image_dims, **kwargs):
3739
self.shuffle = kwargs.pop('shuffle', True)
3840
self.resize_mode = kwargs.pop('resize_mode' , None)
3941
self.encoding = kwargs.pop('encoding', None)
42+
self.compression = kwargs.pop('compression', None)
4043
self.mean_file = kwargs.pop('mean_file', None)
4144
self.labels_file = kwargs.pop('labels_file', None)
4245

@@ -45,6 +48,7 @@ def __init__(self, input_file, db_name, image_dims, **kwargs):
4548

4649
self.input_file = input_file
4750
self.db_name = db_name
51+
self.backend = backend
4852
self.image_dims = image_dims
4953
if image_dims[2] == 3:
5054
self.image_channel_order = 'BGR'
@@ -87,6 +91,11 @@ def __setstate__(self, state):
8791
self.encoding = 'none'
8892
self.pickver_task_createdb = PICKLE_VERSION
8993

94+
if not hasattr(self, 'backend'):
95+
self.backend = 'lmdb'
96+
if not hasattr(self, 'compression'):
97+
self.compression = 'none'
98+
9099
@override
91100
def name(self):
92101
if self.db_name == utils.constants.TRAIN_DB or 'train' in self.db_name.lower():
@@ -133,6 +142,7 @@ def task_arguments(self, resources):
133142
self.path(self.db_name),
134143
self.image_dims[1],
135144
self.image_dims[0],
145+
'--backend=%s' % self.backend,
136146
'--channels=%s' % self.image_dims[2],
137147
'--resize_mode=%s' % self.resize_mode,
138148
]
@@ -147,6 +157,8 @@ def task_arguments(self, resources):
147157
args.append('--shuffle')
148158
if self.encoding and self.encoding != 'none':
149159
args.append('--encoding=%s' % self.encoding)
160+
if self.compression and self.compression != 'none':
161+
args.append('--compression=%s' % self.compression)
150162

151163
return args
152164

digits/templates/datasets/images/classification/new.html

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,6 @@ <h1>New Image Classification Dataset</h1>
3333
{{ form.resize_channels(class='form-control') }}
3434
</div>
3535
</div>
36-
<div class="row">
37-
<div class="form-group{{ ' has-error' if form.encoding.errors else '' }}">
38-
<div class="form-group{{' has-error' if form.encoding.errors}}">
39-
{{form.encoding.label}}
40-
{{form.encoding.tooltip}}
41-
{{form.encoding(class='form-control')}}
42-
</div>
43-
</div>
44-
</div>
4536
<div class="row">
4637
<div class="form-group{{ ' has-error' if form.resize_width.errors or form.resize_height.errors else '' }}">
4738
<label>Image size</label>
@@ -82,6 +73,7 @@ <h1>New Image Classification Dataset</h1>
8273
"width": $("#resize_width").val(),
8374
"height": $("#resize_height").val(),
8475
"resize_mode": $("#resize_mode").val(),
76+
"backend": $("#backend").val(),
8577
"encoding": $("#encoding").val(),
8678
},
8779
function(response) {
@@ -369,6 +361,49 @@ <h1>New Image Classification Dataset</h1>
369361

370362
<div class="row">
371363
<div class="col-sm-6 col-sm-offset-3 well">
364+
<div class="form-group{{ ' has-error' if form.backend.errors else '' }}">
365+
{{ form.backend.label }}
366+
{{ form.backend.tooltip }}
367+
{{ form.backend(class='form-control') }}
368+
</div>
369+
<div id="backend-hdf5-warning" class="alert alert-warning" style="display:none;">
370+
<b>NOTE:</b> HDF5 is not fully supported by Caffe or by DIGITS
371+
<ul>
372+
<li>The standard networks will need some minor customizations before use (change <i>Data</i> layers to <i>HDF5Data</i> layers)</li>
373+
<li><i>HDF5Data</i> layers do not support mean subtraction</li>
374+
</ul>
375+
</div>
376+
<div class="form-group{{ ' has-error' if form.compression.errors else '' }}">
377+
<div class="form-group{{' has-error' if form.compression.errors}}">
378+
{{form.compression.label}}
379+
{{form.compression.tooltip}}
380+
{{form.compression(class='form-control')}}
381+
</div>
382+
</div>
383+
<div class="form-group{{ ' has-error' if form.encoding.errors else '' }}">
384+
<div class="form-group{{' has-error' if form.encoding.errors}}">
385+
{{form.encoding.label}}
386+
{{form.encoding.tooltip}}
387+
{{form.encoding(class='form-control')}}
388+
</div>
389+
</div>
390+
<script>
391+
function backendChanged()
392+
{
393+
val = $("#backend").val();
394+
if (val == 'lmdb') {
395+
$("#compression").parent().hide();
396+
$("#encoding").parent().show();
397+
$("#backend-hdf5-warning").hide();
398+
} else if (val == 'hdf5') {
399+
$("#encoding").parent().hide();
400+
$("#compression").parent().show();
401+
$("#backend-hdf5-warning").show();
402+
}
403+
}
404+
$("#backend").change(backendChanged);
405+
backendChanged();
406+
</script>
372407
<div class="form-group{{ ' has-error' if form.dataset_name.errors else '' }}">
373408
{{ form.dataset_name.label }}
374409
{{ form.dataset_name(class='form-control') }}

digits/templates/datasets/images/classification/show.html

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,18 @@ <h4>Job Information</h4>
1212
<dd>{{ job.dir() }}</dd>
1313
</dl>
1414
<dl>
15-
<dt>Image Type</dt>
16-
<dd>{{'Color' if job.image_dims[2] == 3 else 'Grayscale'}}</dd>
17-
<dt>Image Encoding</dt>
18-
<dd>{{job.train_db_task().encoding}}</dd>
1915
<dt>Image Dimensions</dt>
2016
<dd>{{job.image_dims[1]}}x{{job.image_dims[0]}}</dd>
17+
<dt>Image Type</dt>
18+
<dd>{{'Color' if job.image_dims[2] == 3 else 'Grayscale'}}</dd>
2119
<dt>Resize Transformation</dt>
2220
<dd>{{ job.resize_mode_name() }}</dd>
21+
<dt>DB Backend</dt>
22+
<dd>{{job.train_db_task().backend}}</dd>
23+
<dt>Image Encoding</dt>
24+
<dd>{{job.train_db_task().encoding}}</dd>
25+
<dt>DB Compression</dt>
26+
<dd>{{job.train_db_task().compression}}</dd>
2327
</dl>
2428
</div>
2529
{% endmacro %}

docs/API.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# REST API
22

3-
*Generated Sep 01, 2015*
3+
*Generated Sep 02, 2015*
44

55
DIGITS exposes its internal functionality through a REST API. You can access these endpoints by performing a GET or POST on the route, and a JSON object will be returned.
66

docs/FlaskRoutes.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Flask Routes
22

3-
*Generated Sep 01, 2015*
3+
*Generated Sep 02, 2015*
44

55
Documentation on the various routes used internally for the web application.
66

0 commit comments

Comments
 (0)