@@ -5,44 +5,53 @@ import (
55
66 "github.com/apache/arrow/go/v13/arrow"
77 "github.com/cloudquery/plugin-sdk/v3/types"
8+ "golang.org/x/exp/slices"
89)
910
10- type listLike interface {
11- arrow.DataType
12- Elem () arrow.DataType
11+ func transformSchemaForWriting ( sc * arrow. Schema ) * arrow. Schema {
12+ md := arrow .MetadataFrom ( sc . Metadata (). ToMap ())
13+ return arrow .NewSchema ( transformFieldsForWriting ( sc . Fields ()), & md )
1314}
1415
15- func transformSchemaForWriting (sc * arrow.Schema ) * arrow.Schema {
16- fields := sc .Fields ()
16+ func transformFieldsForWriting (fields []arrow.Field ) []arrow.Field {
1717 for i := range fields {
1818 fields [i ].Type = transformTypeForWriting (fields [i ].Type )
1919 }
20- md := sc .Metadata ()
21- return arrow .NewSchema (fields , & md )
20+ return fields
2221}
2322
2423func transformTypeForWriting (dt arrow.DataType ) arrow.DataType {
2524 switch dt := dt .(type ) {
26- case listLike :
27- return arrow .ListOf ( transformTypeForWriting (dt .Elem ()))
25+ case * arrow. StructType :
26+ return arrow .StructOf ( transformFieldsForWriting (dt .Fields ())... )
2827 case * arrow.MapType :
29- return arrow .ListOf (transformTypeForWriting (dt .ValueType ()))
30- }
31-
32- switch dt := duckDBToArrow (arrowToDuckDB (dt )).(type ) {
28+ return arrow .MapOf (transformTypeForWriting (dt .KeyType ()), transformTypeForWriting (dt .ItemType ()))
29+ case arrow.ListLikeType :
30+ return arrow .ListOf (transformTypeForWriting (dt .Elem ()))
3331 case * types.UUIDType , * types.JSONType :
3432 return arrow .BinaryTypes .String
3533 default :
36- return dt
34+ return duckDBToArrow ( arrowToDuckDB ( dt ))
3735 }
3836}
3937
4038func arrowToDuckDB (dt arrow.DataType ) string {
4139 switch dt := dt .(type ) {
42- case listLike :
43- return arrowToDuckDB (dt .Elem ()) + "[]"
40+ case * arrow.StructType :
41+ builder := new (strings.Builder )
42+ builder .WriteString ("struct(" )
43+ for i , field := range dt .Fields () {
44+ if i > 0 {
45+ builder .WriteString (", " )
46+ }
47+ builder .WriteString (sanitizeID (field .Name ) + " " + arrowToDuckDB (field .Type ))
48+ }
49+ builder .WriteString (")" )
50+ return builder .String ()
4451 case * arrow.MapType :
45- return arrowToDuckDB (arrow .ListOf (dt .ValueType ()))
52+ return "map(" + arrowToDuckDB (dt .KeyType ()) + ", " + arrowToDuckDB (dt .ItemType ()) + ")"
53+ case arrow.ListLikeType :
54+ return arrowToDuckDB (dt .Elem ()) + "[]"
4655 case * arrow.BooleanType :
4756 return "boolean"
4857 case * arrow.Int8Type :
@@ -79,20 +88,21 @@ func arrowToDuckDB(dt arrow.DataType) string {
7988 return "date"
8089 case * arrow.DayTimeIntervalType :
8190 return "interval"
82- case * arrow.StructType :
83- return "json"
8491 default :
8592 return "varchar"
8693 }
8794}
8895
8996func duckDBToArrow (t string ) arrow.DataType {
90- if strings .HasSuffix (t , "[]" ) {
97+ switch {
98+ case strings .HasSuffix (t , "[]" ):
9199 return arrow .ListOf (duckDBToArrow (strings .TrimSuffix (t , "[]" )))
100+ case strings .HasPrefix (t , "struct" ):
101+ return duckDBStructToArrow (t )
102+ case strings .HasPrefix (t , "map" ):
103+ return duckDBMapToArrow (t )
92104 }
93- if strings .HasPrefix (t , "struct" ) {
94- return types .ExtensionTypes .JSON
95- }
105+
96106 switch t {
97107 case "tinyint" , "int1" :
98108 return arrow .PrimitiveTypes .Int8
@@ -133,6 +143,74 @@ func duckDBToArrow(t string) arrow.DataType {
133143 }
134144}
135145
146+ func duckDBStructToArrow (spec string ) * arrow.StructType {
147+ params := strings .TrimPrefix (spec , "struct" )
148+ params = strings .TrimSpace (params )
149+ params = strings .TrimSuffix (strings .TrimPrefix (params , "(" ), ")" )
150+
151+ fieldsSpec := splitParams (params )
152+ if len (fieldsSpec ) == 0 {
153+ panic ("unsupported struct spec: " + spec )
154+ }
155+
156+ fields := make ([]arrow.Field , len (fieldsSpec ))
157+ for i , fieldSpec := range fieldsSpec {
158+ parts := strings .SplitN (fieldSpec , " " , 2 )
159+ if len (parts ) != 2 {
160+ panic ("unsupported field spec: " + fieldSpec )
161+ }
162+
163+ fields [i ] = arrow.Field {
164+ Name : strings .Trim (parts [0 ], `"` ),
165+ Type : duckDBToArrow (strings .TrimSpace (parts [1 ])),
166+ Nullable : true , // all duckdb columns are nullable
167+ }
168+ }
169+
170+ return arrow .StructOf (fields ... )
171+ }
172+
173+ func duckDBMapToArrow (spec string ) * arrow.MapType {
174+ params := strings .TrimPrefix (spec , "map" )
175+ params = strings .TrimSpace (params )
176+ params = strings .TrimSuffix (strings .TrimPrefix (params , "(" ), ")" )
177+
178+ kv := splitParams (params )
179+ if len (kv ) != 2 {
180+ panic ("unsupported map spec: " + spec )
181+ }
182+
183+ // these should only be types
184+ return arrow .MapOf (duckDBToArrow (kv [0 ]), duckDBToArrow (kv [1 ]))
185+ }
186+
187+ func splitParams (params string ) []string {
188+ params = strings .TrimSpace (params )
189+
190+ var brackets int
191+ var parts []string
192+ elem := make ([]rune , 0 , len (params ))
193+
194+ for _ , r := range params {
195+ switch r {
196+ case '(' :
197+ brackets ++
198+ case ')' :
199+ brackets --
200+ case ',' :
201+ if brackets == 0 {
202+ parts = append (parts , strings .TrimSpace (string (elem )))
203+ elem = elem [:0 ] // cleanup
204+ continue
205+ }
206+ }
207+ elem = append (elem , r )
208+ }
209+ parts = append (parts , strings .TrimSpace (string (elem )))
210+
211+ return slices .Clip (parts )
212+ }
213+
136214func sanitizeID (id string ) string {
137215 return `"` + id + `"`
138216}
0 commit comments