Skip to content

Commit b80f7ad

Browse files
author
Harshitha Parnandi Venkata
committed
Added a few optimizations to re-use buffers and thereby improving performance.
1 parent b106ae0 commit b80f7ad

2 files changed

Lines changed: 64 additions & 13 deletions

File tree

docs/samples/Microsoft.ML.Samples/Dynamic/ImageClassification/ResnetV2101TransferLearningTrainTestSplit.cs

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ public static void Example()
2828
"images");
2929

3030
//Download the image set and unzip
31-
string finalImagesFolderName = DownloadImageSet(
32-
imagesDownloadFolderPath);
33-
31+
//string finalImagesFolderName = DownloadImageSet(
32+
// imagesDownloadFolderPath);
33+
string finalImagesFolderName = "flower_photos";
3434
string fullImagesetFolderPath = Path.Combine(
3535
imagesDownloadFolderPath, finalImagesFolderName);
3636

@@ -170,12 +170,49 @@ private static void EvaluateModel(MLContext mlContext,
170170
(elapsed2Ms / 1000).ToString() + " seconds");
171171
}
172172

173+
public static int LoadDataIntoBuffer(string path, ref VBuffer<Byte> imgData)
174+
{
175+
int count = -1;
176+
// bufferSize == 1 used to avoid unnecessary buffer in FileStream
177+
using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1))
178+
{
179+
long fileLength = fs.Length;
180+
if (fileLength > int.MaxValue)
181+
throw new IOException($"File {path} too big to open.");
182+
else if (fileLength == 0)
183+
{
184+
byte[] _imageBuffer;
185+
186+
// Some file systems (e.g. procfs on Linux) return 0 for length even when there's content.
187+
// Thus we need to assume 0 doesn't mean empty.
188+
_imageBuffer = File.ReadAllBytes(path);
189+
count = _imageBuffer.Length;
190+
Console.WriteLine("File length is zero");
191+
}
192+
193+
count = (int)fileLength;
194+
var editor = VBufferEditor.Create(ref imgData, count);
195+
//var buffer = File.ReadAllBytes(path);
196+
fs.Read(editor.Values);
197+
/*
198+
for (int i = 0; i < count; i++)
199+
{
200+
//editor.Values[i] = (byte) fs.ReadByte();
201+
editor.Values[i] = buffer[i];
202+
}
203+
*/
204+
imgData = editor.Commit();
205+
206+
return count;
207+
}
208+
}
209+
173210
public static IEnumerable<ImageData> LoadImagesFromDirectory(string folder,
174211
bool useFolderNameasLabel = true)
175212
{
176213
var files = Directory.GetFiles(folder, "*",
177214
searchOption: SearchOption.AllDirectories);
178-
215+
VBuffer<Byte> imgData = new VBuffer<byte>();
179216
foreach (var file in files)
180217
{
181218
if (Path.GetExtension(file) != ".JPEG" && Path.GetExtension(file) != ".jpg")
@@ -197,8 +234,7 @@ public static IEnumerable<ImageData> LoadImagesFromDirectory(string folder,
197234
}
198235

199236
// Get the buffer of bytes
200-
byte[] imgBytes = File.ReadAllBytes(Path.Combine(folder, file));
201-
VBuffer<Byte> imgData = new VBuffer<byte>(imgBytes.Length, imgBytes);
237+
int imgSize = LoadDataIntoBuffer(file, ref imgData);
202238

203239
yield return new ImageData()
204240
{

src/Microsoft.ML.Dnn/ImageClassificationTransform.cs

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,26 @@ private void CheckTrainingParameters(ImageClassificationEstimator.Options option
189189
return (jpegData, resizedImage);
190190
}
191191

192+
193+
private static Tensor Encode(VBuffer<byte> buffer, int length)
194+
{
195+
var size = c_api.TF_StringEncodedSize((UIntPtr)length);
196+
var handle = c_api.TF_AllocateTensor(TF_DataType.TF_STRING, IntPtr.Zero, 0, (UIntPtr)((ulong)size + 8));
197+
//AllocationType = AllocationType.Tensorflow;
198+
199+
IntPtr tensor = c_api.TF_TensorData(handle);
200+
Marshal.WriteInt64(tensor, 0);
201+
202+
var status = new Status();
203+
unsafe
204+
{
205+
fixed (byte* src = buffer.GetValues())
206+
c_api.TF_StringEncode(src, (UIntPtr)length, (sbyte*)(tensor + sizeof(Int64)), size, status);
207+
}
208+
status.Check(true);
209+
return new Tensor(handle);
210+
}
211+
192212
private sealed class ImageProcessor
193213
{
194214
private Runner _imagePreprocessingRunner;
@@ -202,7 +222,7 @@ public ImageProcessor(ImageClassificationTransformer transformer)
202222

203223
public Tensor ProcessImage(VBuffer<byte> imgBuf)
204224
{
205-
var imageTensor = new Tensor(imgBuf.DenseValues().ToArray(), TF_DataType.TF_STRING);
225+
var imageTensor = Encode(imgBuf, imgBuf.Length);
206226
var processedTensor = _imagePreprocessingRunner.AddInput(imageTensor, 0).Run()[0];
207227
imageTensor.Dispose();
208228
return processedTensor;
@@ -221,16 +241,14 @@ private void CacheFeaturizedImagesToDisk(IDataView input, string labelColumnName
221241
labelColumn.Type.RawType.ToString());
222242

223243
var imageBufColumn = input.Schema[imageColumnName];
224-
var imagePathColumn = input.Schema["ImagePath"];
225244
Runner runner = new Runner(_session);
226245
runner.AddOutputs(outputTensorName);
227246

228247
using (TextWriter writer = File.CreateText(cacheFilePath))
229-
using (var cursor = input.GetRowCursor(input.Schema.Where(c => c.Index == labelColumn.Index || c.Index == imageBufColumn.Index || c.Index == imagePathColumn.Index)))
248+
using (var cursor = input.GetRowCursor(input.Schema.Where(c => c.Index == labelColumn.Index || c.Index == imageBufColumn.Index)))
230249
{
231250
var labelGetter = cursor.GetGetter<uint>(labelColumn);
232251
var imageBufGetter = cursor.GetGetter<VBuffer<byte>>(imageBufColumn);
233-
var imagePathGetter = cursor.GetGetter<ReadOnlyMemory<char>>(imagePathColumn);
234252
UInt32 label = UInt32.MaxValue;
235253
VBuffer<byte> imageBuf = default;
236254
ReadOnlyMemory<char> imagePath = default;
@@ -242,16 +260,13 @@ private void CacheFeaturizedImagesToDisk(IDataView input, string labelColumnName
242260
{
243261
labelGetter(ref label);
244262
imageBufGetter(ref imageBuf);
245-
imagePathGetter(ref imagePath);
246-
var imagePathStr = imagePath.ToString();
247263
var imageTensor = imageProcessor.ProcessImage(imageBuf);
248264
runner.AddInput(imageTensor, 0);
249265
var featurizedImage = runner.Run()[0]; // Reuse memory?
250266
writer.WriteLine(label - 1 + "," + string.Join(",", featurizedImage.ToArray<float>()));
251267
featurizedImage.Dispose();
252268
imageTensor.Dispose();
253269
metrics.Bottleneck.Index++;
254-
metrics.Bottleneck.Name = imagePathStr;
255270
metricsCallback?.Invoke(metrics);
256271
}
257272
}

0 commit comments

Comments
 (0)