33// See the LICENSE file in the project root for more information.
44
55using System ;
6- using System . Buffers ;
76using System . Collections . Generic ;
87using System . Drawing ;
98using System . IO ;
109using System . Linq ;
10+ using System . Runtime . InteropServices ;
11+ using System . Security . Cryptography ;
1112using System . Text ;
1213using Microsoft . ML ;
1314using Microsoft . ML . CommandLine ;
@@ -71,10 +72,10 @@ internal sealed class Options : TransformInputBase
7172 /// </summary>
7273 public readonly string ImageFolder ;
7374 /// <summary>
74- /// The DataViewType for the image. It can be a VectorDataView of bytes or ImageDataView type.
75- /// If no options are specified, it defaults to ImageDataView type.
75+ /// The flag for DataViewType for the image. If Type true, it is a VectorDataView of bytes else it is an ImageDataView type.
76+ /// If no options are specified, it defaults to false for ImageDataView type.
7677 /// </summary>
77- public readonly DataViewType Type ;
78+ public readonly bool Type ;
7879
7980 /// <summary>
8081 /// The columns passed to this <see cref="ITransformer"/>.
@@ -91,7 +92,7 @@ internal ImageLoadingTransformer(IHostEnvironment env, string imageFolder = null
9192 : base ( Contracts . CheckRef ( env , nameof ( env ) ) . Register ( nameof ( ImageLoadingTransformer ) ) , columns )
9293 {
9394 ImageFolder = imageFolder ;
94- Type = new ImageDataViewType ( ) ;
95+ Type = false ;
9596 }
9697
9798 /// <summary>
@@ -101,14 +102,11 @@ internal ImageLoadingTransformer(IHostEnvironment env, string imageFolder = null
101102 /// <param name="imageFolder">Folder where to look for images.</param>
102103 /// <param name="type">Image type - ImageDataViewType or VectorDataViewType. Defaults to ImageDataViewType if not specified.</param>
103104 /// <param name="columns">Names of input and output columns.</param>
104- internal ImageLoadingTransformer ( IHostEnvironment env , string imageFolder = null , DataViewType type = null , params ( string outputColumnName , string inputColumnName ) [ ] columns )
105+ internal ImageLoadingTransformer ( IHostEnvironment env , string imageFolder = null , bool type = false , params ( string outputColumnName , string inputColumnName ) [ ] columns )
105106 : base ( Contracts . CheckRef ( env , nameof ( env ) ) . Register ( nameof ( ImageLoadingTransformer ) ) , columns )
106107 {
107108 ImageFolder = imageFolder ;
108- if ( type == null )
109- Type = new ImageDataViewType ( ) ;
110- else
111- Type = type ;
109+ Type = type ;
112110 }
113111
114112 // Factory method for SignatureDataTransform.
@@ -125,7 +123,7 @@ private static ImageLoadingTransformer Create(IHostEnvironment env, ModelLoadCon
125123 env . CheckValue ( ctx , nameof ( ctx ) ) ;
126124
127125 ctx . CheckAtModel ( GetVersionInfo ( ) ) ;
128- return new ImageLoadingTransformer ( env . Register ( nameof ( ImageLoadingTransformer ) ) , ctx ) ;
126+ return new ImageLoadingTransformer ( env . Register ( nameof ( ImageLoadingTransformer ) ) , ctx ) ;
129127 }
130128
131129 private ImageLoadingTransformer ( IHost host , ModelLoadContext ctx )
@@ -137,13 +135,13 @@ private ImageLoadingTransformer(IHost host, ModelLoadContext ctx)
137135
138136 ImageFolder = ctx . LoadStringOrNull ( ) ;
139137
140- if ( new VectorDataViewType ( NumberDataViewType . Byte ) . ToString ( ) . Equals ( ctx . LoadStringOrNull ( ) ) )
138+ if ( ctx . LoadStringOrNull ( ) . Equals ( "True" ) )
141139 {
142- Type = new VectorDataViewType ( NumberDataViewType . Byte ) ;
140+ Type = true ; // It is a VBuffer<byte> type
143141 }
144142 else
145143 {
146- Type = new ImageDataViewType ( ) ;
144+ Type = false ; // It is a ImageDataViewType
147145 }
148146
149147 }
@@ -195,9 +193,9 @@ private static VersionInfo GetVersionInfo()
195193 private sealed class Mapper : OneToOneMapperBase
196194 {
197195 private readonly ImageLoadingTransformer _parent ;
198- private readonly DataViewType _type ;
196+ private readonly bool _type ;
199197
200- public Mapper ( ImageLoadingTransformer parent , DataViewSchema inputSchema , DataViewType type )
198+ public Mapper ( ImageLoadingTransformer parent , DataViewSchema inputSchema , bool type )
201199 : base ( parent . Host . Register ( nameof ( Mapper ) ) , parent , inputSchema )
202200 {
203201 _type = type ;
@@ -207,18 +205,18 @@ public Mapper(ImageLoadingTransformer parent, DataViewSchema inputSchema, DataVi
207205 protected override Delegate MakeGetter ( DataViewRow input , int iinfo , Func < int , bool > activeOutput , out Action disposer )
208206 {
209207 disposer = null ;
210- // Check for the type of Image, VBuffer<bytes> or ImageDataViewType and call the appropriate MakeGetter function
211- if ( new VectorDataViewType ( NumberDataViewType . Byte ) . Equals ( _type ) )
208+ // Check for the type of Image, if true load images as VBuffer<bytes> else load images as ImageDataViewType
209+ if ( _type )
212210 {
213- return MakeGetterType ( input , iinfo , activeOutput , ( VectorDataViewType ) _type , out disposer ) ;
211+ return MakeGetterVectorDataViewByteType ( input , iinfo , activeOutput , out disposer ) ;
214212 }
215213 else
216214 {
217- return MakeGetterType ( input , iinfo , activeOutput , ( ImageDataViewType ) _type , out disposer ) ;
215+ return MakeGetterImageDataViewType ( input , iinfo , activeOutput , out disposer ) ;
218216 }
219217 }
220218
221- private Delegate MakeGetterType ( DataViewRow input , int iinfo , Func < int , bool > activeOutput , ImageDataViewType type , out Action disposer )
219+ private Delegate MakeGetterImageDataViewType ( DataViewRow input , int iinfo , Func < int , bool > activeOutput , out Action disposer )
222220 {
223221 Contracts . AssertValue ( input ) ;
224222 Contracts . Assert ( 0 <= iinfo && iinfo < _parent . ColumnPairs . Length ) ;
@@ -253,7 +251,7 @@ private Delegate MakeGetterType(DataViewRow input, int iinfo, Func<int, bool> ac
253251 return del ;
254252 }
255253
256- private Delegate MakeGetterType ( DataViewRow input , int iinfo , Func < int , bool > activeOutput , VectorDataViewType type , out Action disposer )
254+ private Delegate MakeGetterVectorDataViewByteType ( DataViewRow input , int iinfo , Func < int , bool > activeOutput , out Action disposer )
257255 {
258256 Contracts . AssertValue ( input ) ;
259257 Contracts . Assert ( 0 <= iinfo && iinfo < _parent . ColumnPairs . Length ) ;
@@ -283,6 +281,7 @@ private Delegate MakeGetterType(DataViewRow input, int iinfo, Func<int, bool> ac
283281 public static int LoadDataIntoBuffer ( string path , ref VBuffer < byte > imgData )
284282 {
285283 int count = - 1 ;
284+ int bytesread = - 1 ;
286285 // bufferSize == 1 used to avoid unnecessary buffer in FileStream
287286 using ( FileStream fs = new FileStream ( path , FileMode . Open , FileAccess . Read , FileShare . Read , bufferSize : 1 ) )
288287 {
@@ -291,34 +290,68 @@ public static int LoadDataIntoBuffer(string path, ref VBuffer<byte> imgData)
291290 throw new IOException ( $ "File { path } too big to open.") ;
292291 else if ( fileLength == 0 )
293292 {
293+ Console . WriteLine ( "File length is zero even though the image is not empty" ) ;
294294 byte [ ] imageBuffer ;
295295 // Some file systems (e.g. procfs on Linux) return 0 for length even when there's content.
296296 // Thus we need to assume 0 doesn't mean empty.
297297 imageBuffer = File . ReadAllBytes ( path ) ;
298298 count = imageBuffer . Length ;
299- Console . WriteLine ( "File length is zero" ) ;
299+ imgData = new VBuffer < byte > ( count , imageBuffer ) ;
300+ return count ;
300301 }
301302
302303 count = ( int ) fileLength ;
304+ var editor = VBufferEditor . Create ( ref imgData , count ) ;
303305
304306#if NETSTANDARD2_0
305- byte [ ] buffer = null ;
306- buffer = File . ReadAllBytes ( path ) ;
307- imgData = new VBuffer < byte > ( buffer . Length , buffer ) ;
307+ bytesread = ReadToEnd ( fs , editor . Values ) ;
308+ Contracts . Assert ( count == bytesread ) ;
308309
309310#else
310- var editor = VBufferEditor . Create ( ref imgData , count ) ;
311311 fs . Read ( editor . Values ) ;
312- imgData = editor . Commit ( ) ;
312+ bytesread = editor . Values . Length ;
313+ Contracts . Assert ( count == bytesread ) ;
313314#endif
315+ imgData = editor . Commit ( ) ;
314316 return count ;
315317
316318 }
317319
318320 }
319321
322+ public static int ReadToEnd ( System . IO . Stream stream , Span < byte > bufferspan )
323+ {
324+
325+ int chunksize = 4096 ; // Most optimal size for buffer, friendly to CPU's L1 cache
326+ byte [ ] readBuffer = new byte [ chunksize ] ;
327+ int totalBytesRead = 0 ;
328+ int bytesRead ;
329+ unsafe
330+ {
331+ fixed ( byte * readBufferPtr = readBuffer )
332+ fixed ( byte * bufferSpanPtr = & MemoryMarshal . GetReference ( bufferspan ) )
333+ {
334+ while ( ( bytesRead = stream . Read ( readBuffer , 0 , readBuffer . Length ) ) > 0 )
335+ {
336+ Buffer . MemoryCopy ( readBufferPtr , bufferSpanPtr + totalBytesRead , bufferspan . Length - totalBytesRead , bytesRead ) ;
337+ totalBytesRead += bytesRead ;
338+ }
339+ }
340+ }
341+ return totalBytesRead ;
342+
343+ }
344+
345+ public DataViewType GetDataViewType ( )
346+ {
347+ if ( _type )
348+ return new VectorDataViewType ( NumberDataViewType . Byte ) ;
349+ else
350+ return new ImageDataViewType ( ) ;
351+ }
352+
320353 protected override DataViewSchema . DetachedColumn [ ] GetOutputColumnsCore ( )
321- => _parent . ColumnPairs . Select ( x => new DataViewSchema . DetachedColumn ( x . outputColumnName , _type , null ) ) . ToArray ( ) ;
354+ => _parent . ColumnPairs . Select ( x => new DataViewSchema . DetachedColumn ( x . outputColumnName , GetDataViewType ( ) , null ) ) . ToArray ( ) ;
322355 }
323356 }
324357
@@ -371,18 +404,18 @@ internal ImageLoadingEstimator(IHostEnvironment env, string imageFolder, params
371404 /// <param name="imageFolder">Folder where to look for images.</param>
372405 /// <param name="type">Image type - VectorDataView type or ImageDataViewType. Defaults to ImageDataViewType if not specified or null.</param>
373406 /// <param name="columns">Names of input and output columns.</param>
374- internal ImageLoadingEstimator ( IHostEnvironment env , string imageFolder , DataViewType type = null , params ( string outputColumnName , string inputColumnName ) [ ] columns )
407+ internal ImageLoadingEstimator ( IHostEnvironment env , string imageFolder , bool type = false , params ( string outputColumnName , string inputColumnName ) [ ] columns )
375408 : this ( env , new ImageLoadingTransformer ( env , imageFolder , type , columns ) , type )
376409 {
377410 }
378411
379- internal ImageLoadingEstimator ( IHostEnvironment env , ImageLoadingTransformer transformer , DataViewType type = null )
412+ internal ImageLoadingEstimator ( IHostEnvironment env , ImageLoadingTransformer transformer , bool type = false )
380413 : base ( Contracts . CheckRef ( env , nameof ( env ) ) . Register ( nameof ( ImageLoadingEstimator ) ) , transformer )
381414 {
382- if ( type == null )
415+ if ( ! type )
383416 _imageType = new ImageDataViewType ( ) ;
384417 else
385- _imageType = type ;
418+ _imageType = new VectorDataViewType ( NumberDataViewType . Byte ) ;
386419 }
387420
388421 /// <summary>
@@ -400,7 +433,10 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema)
400433 if ( ! ( col . ItemType is TextDataViewType ) || col . Kind != SchemaShape . Column . VectorKind . Scalar )
401434 throw Host . ExceptSchemaMismatch ( nameof ( inputSchema ) , "input" , inputColumnName , TextDataViewType . Instance . ToString ( ) , col . GetTypeString ( ) ) ;
402435
403- result [ outputColumnName ] = new SchemaShape . Column ( outputColumnName , SchemaShape . Column . VectorKind . Scalar , _imageType , false ) ;
436+ if ( _imageType is ImageDataViewType )
437+ result [ outputColumnName ] = new SchemaShape . Column ( outputColumnName , SchemaShape . Column . VectorKind . Scalar , _imageType , false ) ;
438+ else
439+ result [ outputColumnName ] = new SchemaShape . Column ( outputColumnName , SchemaShape . Column . VectorKind . Vector , NumberDataViewType . Byte , false ) ;
404440 }
405441
406442 return new SchemaShape ( result . Values ) ;
0 commit comments