Skip to content

Commit 9a3b4ad

Browse files
authored
fix: Test Decimal type, map type and larger number ranges (#905)
This adds: - support for testing decimal types - support for testing maps (only string and int keys for now) - randomization for values based on a given seed. Because a seed is used, the tests are still 100% reproducible, but it allows us to test values closer to the min/max of integer ranges and gives us different keys to test in maps I found at least 4 surprising bugs in the Elasticsearch implementation through this. Destinations that don't want to support these features right now can still opt to skip the types.
1 parent ada8994 commit 9a3b4ad

File tree

3 files changed

+74
-24
lines changed

3 files changed

+74
-24
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ module github.com/cloudquery/plugin-sdk/v3
33
go 1.19
44

55
require (
6-
github.com/apache/arrow/go/v13 v13.0.0-20230509040948-de6c3cd2b604
6+
github.com/apache/arrow/go/v13 v13.0.0-20230525142029-2d32efeedad8
77
github.com/bradleyjkemp/cupaloy/v2 v2.8.0
88
github.com/cloudquery/plugin-pb-go v1.0.8
99
github.com/cloudquery/plugin-sdk/v2 v2.7.0

plugins/destination/plugin_testing.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,12 @@ func WithTestSourceSkipLargeTypes() func(o *PluginTestSuiteRunnerOptions) {
171171
}
172172
}
173173

174+
func WithTestSourceSkipDecimals() func(o *PluginTestSuiteRunnerOptions) {
175+
return func(o *PluginTestSuiteRunnerOptions) {
176+
o.SkipDecimals = true
177+
}
178+
}
179+
174180
func PluginTestSuiteRunner(t *testing.T, newPlugin NewPluginFunc, destSpec specs.Destination, tests PluginTestSuiteTests, testOptions ...func(o *PluginTestSuiteRunnerOptions)) {
175181
t.Helper()
176182
destSpec.Name = "testsuite"

schema/testdata.go

Lines changed: 67 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package schema
22

33
import (
4+
"encoding/base64"
45
"fmt"
56
"reflect"
67
"sort"
@@ -13,20 +14,22 @@ import (
1314
"github.com/apache/arrow/go/v13/arrow/memory"
1415
"github.com/cloudquery/plugin-sdk/v3/types"
1516
"github.com/google/uuid"
17+
"golang.org/x/exp/rand"
1618
"golang.org/x/exp/slices"
1719
)
1820

1921
// TestSourceOptions controls which types are included by TestSourceColumns.
2022
type TestSourceOptions struct {
21-
SkipLists bool // lists of all primitive types. Lists that were supported by CQTypes are always included.
22-
SkipTimestamps bool // timestamp types. Microsecond timestamp is always be included, regardless of this setting.
2323
SkipDates bool
24+
SkipDecimals bool
25+
SkipDurations bool
26+
SkipIntervals bool
27+
SkipLargeTypes bool // e.g. large binary, large string
28+
SkipLists bool // lists of all primitive types. Lists that were supported by CQTypes are always included.
2429
SkipMaps bool
2530
SkipStructs bool
26-
SkipIntervals bool
27-
SkipDurations bool
2831
SkipTimes bool // time of day types
29-
SkipLargeTypes bool // e.g. large binary, large string
32+
SkipTimestamps bool // timestamp types. Microsecond timestamp is always be included, regardless of this setting.
3033
TimePrecision time.Duration
3134
}
3235

@@ -57,6 +60,10 @@ func TestSourceColumns(testOpts TestSourceOptions) []Column {
5760
// we don't support float16 right now
5861
basicColumns = removeColumnsByType(basicColumns, arrow.FLOAT16)
5962

63+
if !testOpts.SkipDecimals {
64+
basicColumns = append(basicColumns, Column{Name: "decimal", Type: &arrow.Decimal128Type{Precision: 19, Scale: 10}})
65+
}
66+
6067
if testOpts.SkipTimestamps {
6168
// for backwards-compatibility, microsecond timestamps are not removed here
6269
basicColumns = removeColumnsByDataType(basicColumns, &arrow.TimestampType{Unit: arrow.Second, TimeZone: "UTC"})
@@ -96,9 +103,9 @@ func TestSourceColumns(testOpts TestSourceOptions) []Column {
96103
compositeColumns = append(compositeColumns, listOfColumns(basicColumnsWithExclusions)...)
97104
}
98105

99-
// if !opts.SkipMaps {
100-
// compositeColumns = append(compositeColumns, mapOfColumns(basicColumnsWithExclusions)...)
101-
// }
106+
if !testOpts.SkipMaps {
107+
compositeColumns = append(compositeColumns, mapOfColumns(basicColumnsWithExclusions)...)
108+
}
102109

103110
// add JSON later, we don't want to include it as a list or map right now (it causes complications with JSON unmarshalling)
104111
basicColumns = append(basicColumns, Column{Name: "json", Type: types.NewJSONType()})
@@ -214,9 +221,12 @@ func listOfColumns(baseColumns []Column) []Column {
214221
// mapOfColumns returns a list of columns that are maps of the given columns.
215222
// nolint:unused
216223
func mapOfColumns(baseColumns []Column) []Column {
217-
columns := make([]Column, len(baseColumns))
218-
for i := 0; i < len(baseColumns); i++ {
219-
columns[i] = Column{Name: baseColumns[i].Name + "_map", Type: arrow.MapOf(baseColumns[i].Type, baseColumns[i].Type)}
224+
columns := make([]Column, len(baseColumns)*2)
225+
for i := 0; i < len(columns); i += 2 {
226+
// we focus on string and int keys for now
227+
n := i / 2
228+
columns[i] = Column{Name: "int_" + baseColumns[n].Name + "_map", Type: arrow.MapOf(arrow.BinaryTypes.String, baseColumns[n].Type)}
229+
columns[i+1] = Column{Name: "string_" + baseColumns[n].Name + "_map", Type: arrow.MapOf(arrow.PrimitiveTypes.Int64, baseColumns[n].Type)}
220230
}
221231
return columns
222232
}
@@ -260,6 +270,7 @@ type GenTestDataOptions struct {
260270
// StableTime is the time to use for all rows other than sync time. If set to time.Time{}, a new time will be generated
261271
StableTime time.Time
262272
TimePrecision time.Duration
273+
Seed int64
263274
}
264275

265276
// GenTestData generates a slice of arrow.Records with the given schema and options.
@@ -299,12 +310,18 @@ func GenTestData(table *Table, opts GenTestDataOptions) []arrow.Record {
299310
}
300311

301312
func getExampleJSON(colName string, dataType arrow.DataType, opts GenTestDataOptions) string {
313+
src := rand.NewSource(uint64(opts.Seed))
314+
rnd := rand.New(src)
315+
302316
// handle lists (including maps)
303317
if arrow.IsListLike(dataType.ID()) {
304318
if dataType.ID() == arrow.MAP {
305319
k := getExampleJSON(colName, dataType.(*arrow.MapType).KeyType(), opts)
306320
v := getExampleJSON(colName, dataType.(*arrow.MapType).ItemType(), opts)
307-
return fmt.Sprintf(`[{"key": %s,"value": %s}]`, k, v)
321+
opts.Seed++
322+
k2 := getExampleJSON(colName, dataType.(*arrow.MapType).KeyType(), opts)
323+
v2 := getExampleJSON(colName, dataType.(*arrow.MapType).ItemType(), opts)
324+
return fmt.Sprintf(`[{"key": %s,"value": %s},{"key": %s,"value": %s}]`, k, v, k2, v2)
308325
}
309326
inner := dataType.(*arrow.ListType).Elem()
310327
return `[` + getExampleJSON(colName, inner, opts) + `,null,` + getExampleJSON(colName, inner, opts) + `]`
@@ -332,26 +349,47 @@ func getExampleJSON(colName string, dataType arrow.DataType, opts GenTestDataOpt
332349

333350
// handle signed integers
334351
if arrow.IsSignedInteger(dataType.ID()) {
335-
return "-1"
352+
switch dataType {
353+
case arrow.PrimitiveTypes.Int8:
354+
return fmt.Sprintf("-%d", rnd.Intn(int(^uint8(0)>>1)))
355+
case arrow.PrimitiveTypes.Int16:
356+
return fmt.Sprintf("-%d", rnd.Intn(int(^uint16(0)>>1)))
357+
case arrow.PrimitiveTypes.Int32:
358+
return fmt.Sprintf("-%d", rnd.Intn(int(^uint32(0)>>1)))
359+
case arrow.PrimitiveTypes.Int64:
360+
return fmt.Sprintf("-%d", rnd.Int63n(int64(^uint64(0)>>1)))
361+
}
336362
}
337363

338364
// handle unsigned integers
339365
if arrow.IsUnsignedInteger(dataType.ID()) {
340-
return "1"
366+
switch dataType {
367+
case arrow.PrimitiveTypes.Uint8:
368+
return fmt.Sprintf("%d", rnd.Uint64n(uint64(^uint8(0))))
369+
case arrow.PrimitiveTypes.Uint16:
370+
return fmt.Sprintf("%d", rnd.Uint64n(uint64(^uint16(0))))
371+
case arrow.PrimitiveTypes.Uint32:
372+
return fmt.Sprintf("%d", rnd.Uint64n(uint64(^uint32(0))))
373+
case arrow.PrimitiveTypes.Uint64:
374+
return fmt.Sprintf("%d", rnd.Uint64())
375+
}
341376
}
342377

343378
// handle floats
344379
if arrow.IsFloating(dataType.ID()) {
345-
return "1.1"
380+
return fmt.Sprintf("%d.%d", rnd.Intn(1e3), rnd.Intn(1e3))
346381
}
347382

348383
// handle decimals
349384
if arrow.IsDecimal(dataType.ID()) {
350-
return "1.1"
385+
return fmt.Sprintf("%d.%d", rnd.Int63n(1e9), rnd.Int63n(1e10))
351386
}
352387

353388
// handle booleans
354389
if arrow.TypeEqual(dataType, arrow.FixedWidthTypes.Boolean) {
390+
if rnd.Intn(2) == 0 {
391+
return "false"
392+
}
355393
return "true"
356394
}
357395

@@ -365,7 +403,8 @@ func getExampleJSON(colName string, dataType arrow.DataType, opts GenTestDataOpt
365403
if colName == CqSourceNameColumn.Name {
366404
return `"` + opts.SourceName + `"`
367405
}
368-
return `"AString"`
406+
n := rnd.Intn(100000)
407+
return fmt.Sprintf(`"AString%d"`, n)
369408
}
370409
}
371410

@@ -376,7 +415,9 @@ func getExampleJSON(colName string, dataType arrow.DataType, opts GenTestDataOpt
376415
}
377416
for _, binaryType := range binaryTypes {
378417
if arrow.TypeEqual(dataType, binaryType) {
379-
return `"AQIDBA=="` // base64 encoded 0x01, 0x02, 0x03, 0x04
418+
bytes := make([]byte, 4)
419+
rnd.Read(bytes)
420+
return `"` + base64.StdEncoding.EncodeToString(bytes) + `"`
380421
}
381422
}
382423

@@ -450,22 +491,24 @@ func getExampleJSON(colName string, dataType arrow.DataType, opts GenTestDataOpt
450491

451492
// handle date types
452493
if arrow.TypeEqual(dataType, arrow.FixedWidthTypes.Date32) {
453-
return `19471`
494+
return fmt.Sprintf("%d", 19471+rnd.Intn(100))
454495
}
455496
if arrow.TypeEqual(dataType, arrow.FixedWidthTypes.Date64) {
456-
ms := 19471 * 86400000
497+
ms := (19471 + rnd.Intn(100)) * 86400000
457498
return fmt.Sprintf("%d", ms)
458499
}
459500

460501
// handle duration and interval types
461502
if arrow.TypeEqual(dataType, arrow.FixedWidthTypes.DayTimeInterval) {
462-
return `{"days": 1, "milliseconds": 1}`
503+
n := rnd.Intn(10000)
504+
return fmt.Sprintf(`{"days": %[1]d, "milliseconds": %[1]d}`, n)
463505
}
464506
if arrow.TypeEqual(dataType, arrow.FixedWidthTypes.MonthInterval) {
465507
return `{"months": 1}`
466508
}
467509
if arrow.TypeEqual(dataType, arrow.FixedWidthTypes.MonthDayNanoInterval) {
468-
return `{"months": 1, "days": 1, "nanoseconds": 1}`
510+
n := rnd.Intn(10000)
511+
return fmt.Sprintf(`{"months": %[1]d, "days": %[1]d, "nanoseconds": %[1]d}`, n)
469512
}
470513
durationTypes := []arrow.DataType{
471514
arrow.FixedWidthTypes.Duration_s,
@@ -475,7 +518,8 @@ func getExampleJSON(colName string, dataType arrow.DataType, opts GenTestDataOpt
475518
}
476519
for _, durationType := range durationTypes {
477520
if arrow.TypeEqual(dataType, durationType) {
478-
return `123456789`
521+
n := rnd.Intn(10000000)
522+
return fmt.Sprintf("%d", n)
479523
}
480524
}
481525

0 commit comments

Comments
 (0)