11package schema
22
33import (
4+ "encoding/base64"
45 "fmt"
56 "reflect"
67 "sort"
@@ -13,20 +14,22 @@ import (
1314 "github.com/apache/arrow/go/v13/arrow/memory"
1415 "github.com/cloudquery/plugin-sdk/v3/types"
1516 "github.com/google/uuid"
17+ "golang.org/x/exp/rand"
1618 "golang.org/x/exp/slices"
1719)
1820
1921// TestSourceOptions controls which types are included by TestSourceColumns.
2022type TestSourceOptions struct {
21- SkipLists bool // lists of all primitive types. Lists that were supported by CQTypes are always included.
22- SkipTimestamps bool // timestamp types. Microsecond timestamp is always be included, regardless of this setting.
2323 SkipDates bool
24+ SkipDecimals bool
25+ SkipDurations bool
26+ SkipIntervals bool
27+ SkipLargeTypes bool // e.g. large binary, large string
28+ SkipLists bool // lists of all primitive types. Lists that were supported by CQTypes are always included.
2429 SkipMaps bool
2530 SkipStructs bool
26- SkipIntervals bool
27- SkipDurations bool
2831 SkipTimes bool // time of day types
29- SkipLargeTypes bool // e.g. large binary, large string
32+ SkipTimestamps bool // timestamp types. Microsecond timestamp is always be included, regardless of this setting.
3033 TimePrecision time.Duration
3134}
3235
@@ -57,6 +60,10 @@ func TestSourceColumns(testOpts TestSourceOptions) []Column {
5760 // we don't support float16 right now
5861 basicColumns = removeColumnsByType (basicColumns , arrow .FLOAT16 )
5962
63+ if ! testOpts .SkipDecimals {
64+ basicColumns = append (basicColumns , Column {Name : "decimal" , Type : & arrow.Decimal128Type {Precision : 19 , Scale : 10 }})
65+ }
66+
6067 if testOpts .SkipTimestamps {
6168 // for backwards-compatibility, microsecond timestamps are not removed here
6269 basicColumns = removeColumnsByDataType (basicColumns , & arrow.TimestampType {Unit : arrow .Second , TimeZone : "UTC" })
@@ -96,9 +103,9 @@ func TestSourceColumns(testOpts TestSourceOptions) []Column {
96103 compositeColumns = append (compositeColumns , listOfColumns (basicColumnsWithExclusions )... )
97104 }
98105
99- // if !opts .SkipMaps {
100- // compositeColumns = append(compositeColumns, mapOfColumns(basicColumnsWithExclusions)...)
101- // }
106+ if ! testOpts .SkipMaps {
107+ compositeColumns = append (compositeColumns , mapOfColumns (basicColumnsWithExclusions )... )
108+ }
102109
103110 // add JSON later, we don't want to include it as a list or map right now (it causes complications with JSON unmarshalling)
104111 basicColumns = append (basicColumns , Column {Name : "json" , Type : types .NewJSONType ()})
@@ -214,9 +221,12 @@ func listOfColumns(baseColumns []Column) []Column {
214221// mapOfColumns returns a list of columns that are maps of the given columns.
215222// nolint:unused
216223func mapOfColumns (baseColumns []Column ) []Column {
217- columns := make ([]Column , len (baseColumns ))
218- for i := 0 ; i < len (baseColumns ); i ++ {
219- columns [i ] = Column {Name : baseColumns [i ].Name + "_map" , Type : arrow .MapOf (baseColumns [i ].Type , baseColumns [i ].Type )}
224+ columns := make ([]Column , len (baseColumns )* 2 )
225+ for i := 0 ; i < len (columns ); i += 2 {
226+ // we focus on string and int keys for now
227+ n := i / 2
228+ columns [i ] = Column {Name : "int_" + baseColumns [n ].Name + "_map" , Type : arrow .MapOf (arrow .BinaryTypes .String , baseColumns [n ].Type )}
229+ columns [i + 1 ] = Column {Name : "string_" + baseColumns [n ].Name + "_map" , Type : arrow .MapOf (arrow .PrimitiveTypes .Int64 , baseColumns [n ].Type )}
220230 }
221231 return columns
222232}
@@ -260,6 +270,7 @@ type GenTestDataOptions struct {
260270 // StableTime is the time to use for all rows other than sync time. If set to time.Time{}, a new time will be generated
261271 StableTime time.Time
262272 TimePrecision time.Duration
273+ Seed int64
263274}
264275
265276// GenTestData generates a slice of arrow.Records with the given schema and options.
@@ -299,12 +310,18 @@ func GenTestData(table *Table, opts GenTestDataOptions) []arrow.Record {
299310}
300311
301312func getExampleJSON (colName string , dataType arrow.DataType , opts GenTestDataOptions ) string {
313+ src := rand .NewSource (uint64 (opts .Seed ))
314+ rnd := rand .New (src )
315+
302316 // handle lists (including maps)
303317 if arrow .IsListLike (dataType .ID ()) {
304318 if dataType .ID () == arrow .MAP {
305319 k := getExampleJSON (colName , dataType .(* arrow.MapType ).KeyType (), opts )
306320 v := getExampleJSON (colName , dataType .(* arrow.MapType ).ItemType (), opts )
307- return fmt .Sprintf (`[{"key": %s,"value": %s}]` , k , v )
321+ opts .Seed ++
322+ k2 := getExampleJSON (colName , dataType .(* arrow.MapType ).KeyType (), opts )
323+ v2 := getExampleJSON (colName , dataType .(* arrow.MapType ).ItemType (), opts )
324+ return fmt .Sprintf (`[{"key": %s,"value": %s},{"key": %s,"value": %s}]` , k , v , k2 , v2 )
308325 }
309326 inner := dataType .(* arrow.ListType ).Elem ()
310327 return `[` + getExampleJSON (colName , inner , opts ) + `,null,` + getExampleJSON (colName , inner , opts ) + `]`
@@ -332,26 +349,47 @@ func getExampleJSON(colName string, dataType arrow.DataType, opts GenTestDataOpt
332349
333350 // handle signed integers
334351 if arrow .IsSignedInteger (dataType .ID ()) {
335- return "-1"
352+ switch dataType {
353+ case arrow .PrimitiveTypes .Int8 :
354+ return fmt .Sprintf ("-%d" , rnd .Intn (int (^ uint8 (0 )>> 1 )))
355+ case arrow .PrimitiveTypes .Int16 :
356+ return fmt .Sprintf ("-%d" , rnd .Intn (int (^ uint16 (0 )>> 1 )))
357+ case arrow .PrimitiveTypes .Int32 :
358+ return fmt .Sprintf ("-%d" , rnd .Intn (int (^ uint32 (0 )>> 1 )))
359+ case arrow .PrimitiveTypes .Int64 :
360+ return fmt .Sprintf ("-%d" , rnd .Int63n (int64 (^ uint64 (0 )>> 1 )))
361+ }
336362 }
337363
338364 // handle unsigned integers
339365 if arrow .IsUnsignedInteger (dataType .ID ()) {
340- return "1"
366+ switch dataType {
367+ case arrow .PrimitiveTypes .Uint8 :
368+ return fmt .Sprintf ("%d" , rnd .Uint64n (uint64 (^ uint8 (0 ))))
369+ case arrow .PrimitiveTypes .Uint16 :
370+ return fmt .Sprintf ("%d" , rnd .Uint64n (uint64 (^ uint16 (0 ))))
371+ case arrow .PrimitiveTypes .Uint32 :
372+ return fmt .Sprintf ("%d" , rnd .Uint64n (uint64 (^ uint32 (0 ))))
373+ case arrow .PrimitiveTypes .Uint64 :
374+ return fmt .Sprintf ("%d" , rnd .Uint64 ())
375+ }
341376 }
342377
343378 // handle floats
344379 if arrow .IsFloating (dataType .ID ()) {
345- return "1.1"
380+ return fmt . Sprintf ( "%d.%d" , rnd . Intn ( 1e3 ), rnd . Intn ( 1e3 ))
346381 }
347382
348383 // handle decimals
349384 if arrow .IsDecimal (dataType .ID ()) {
350- return "1.1"
385+ return fmt . Sprintf ( "%d.%d" , rnd . Int63n ( 1e9 ), rnd . Int63n ( 1e10 ))
351386 }
352387
353388 // handle booleans
354389 if arrow .TypeEqual (dataType , arrow .FixedWidthTypes .Boolean ) {
390+ if rnd .Intn (2 ) == 0 {
391+ return "false"
392+ }
355393 return "true"
356394 }
357395
@@ -365,7 +403,8 @@ func getExampleJSON(colName string, dataType arrow.DataType, opts GenTestDataOpt
365403 if colName == CqSourceNameColumn .Name {
366404 return `"` + opts .SourceName + `"`
367405 }
368- return `"AString"`
406+ n := rnd .Intn (100000 )
407+ return fmt .Sprintf (`"AString%d"` , n )
369408 }
370409 }
371410
@@ -376,7 +415,9 @@ func getExampleJSON(colName string, dataType arrow.DataType, opts GenTestDataOpt
376415 }
377416 for _ , binaryType := range binaryTypes {
378417 if arrow .TypeEqual (dataType , binaryType ) {
379- return `"AQIDBA=="` // base64 encoded 0x01, 0x02, 0x03, 0x04
418+ bytes := make ([]byte , 4 )
419+ rnd .Read (bytes )
420+ return `"` + base64 .StdEncoding .EncodeToString (bytes ) + `"`
380421 }
381422 }
382423
@@ -450,22 +491,24 @@ func getExampleJSON(colName string, dataType arrow.DataType, opts GenTestDataOpt
450491
451492 // handle date types
452493 if arrow .TypeEqual (dataType , arrow .FixedWidthTypes .Date32 ) {
453- return ` 19471`
494+ return fmt . Sprintf ( "%d" , 19471 + rnd . Intn ( 100 ))
454495 }
455496 if arrow .TypeEqual (dataType , arrow .FixedWidthTypes .Date64 ) {
456- ms := 19471 * 86400000
497+ ms := ( 19471 + rnd . Intn ( 100 )) * 86400000
457498 return fmt .Sprintf ("%d" , ms )
458499 }
459500
460501 // handle duration and interval types
461502 if arrow .TypeEqual (dataType , arrow .FixedWidthTypes .DayTimeInterval ) {
462- return `{"days": 1, "milliseconds": 1}`
503+ n := rnd .Intn (10000 )
504+ return fmt .Sprintf (`{"days": %[1]d, "milliseconds": %[1]d}` , n )
463505 }
464506 if arrow .TypeEqual (dataType , arrow .FixedWidthTypes .MonthInterval ) {
465507 return `{"months": 1}`
466508 }
467509 if arrow .TypeEqual (dataType , arrow .FixedWidthTypes .MonthDayNanoInterval ) {
468- return `{"months": 1, "days": 1, "nanoseconds": 1}`
510+ n := rnd .Intn (10000 )
511+ return fmt .Sprintf (`{"months": %[1]d, "days": %[1]d, "nanoseconds": %[1]d}` , n )
469512 }
470513 durationTypes := []arrow.DataType {
471514 arrow .FixedWidthTypes .Duration_s ,
@@ -475,7 +518,8 @@ func getExampleJSON(colName string, dataType arrow.DataType, opts GenTestDataOpt
475518 }
476519 for _ , durationType := range durationTypes {
477520 if arrow .TypeEqual (dataType , durationType ) {
478- return `123456789`
521+ n := rnd .Intn (10000000 )
522+ return fmt .Sprintf ("%d" , n )
479523 }
480524 }
481525
0 commit comments