Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
77 commits
Select commit Hold shift + click to select a range
08843dd
add as_arrow_array()
paleolimbot Apr 6, 2022
6403ab4
add as_chunked_array()
paleolimbot Apr 6, 2022
696fb4f
Add as_arrow_table
paleolimbot Apr 6, 2022
7c48b24
as_record_batch()
paleolimbot Apr 6, 2022
605ea52
as_record_batch_reader()
paleolimbot Apr 6, 2022
b1438e0
as_arrow_data_type
paleolimbot Apr 6, 2022
5c0edae
as_schema()
paleolimbot Apr 6, 2022
36c7666
actually use as_arrow_table() when writing
paleolimbot Apr 6, 2022
398a6e8
cleaner errors for as_writable_table()
paleolimbot Apr 6, 2022
11ed7e3
fix typo in table test
paleolimbot Apr 11, 2022
b63a00e
update methods for data.frame (but don't use them yet by default).
paleolimbot Apr 11, 2022
4811d70
update pkgdown sections
paleolimbot Apr 11, 2022
7efab30
maybe fall back on as_chunked_array()
paleolimbot Apr 12, 2022
8211ca4
revert to previous logic vis-a-vis data.frame conversion
paleolimbot Apr 12, 2022
6cefb46
don't assume identical for returned record batches
paleolimbot Apr 12, 2022
31bd40c
fix references to glue::glue
paleolimbot Apr 13, 2022
624af4a
clang-format
paleolimbot Apr 13, 2022
ae6b946
remove commented code
paleolimbot Apr 13, 2022
347e8de
stringsAsFactors strikes again!
paleolimbot Apr 13, 2022
a43e815
first pass at falling back to S3 methods when type inference or vec_t…
paleolimbot Apr 13, 2022
54de4b8
Use RConverter API to call as_arrow_array()
paleolimbot Apr 13, 2022
9fff063
Add ability to return a status from an R code execution error
paleolimbot Apr 13, 2022
011b463
update example_with_metadata example (but keep old example for backwa…
paleolimbot Apr 13, 2022
5f0c75f
table-level metadata rabbit hole
paleolimbot Apr 13, 2022
cc986f2
Don't write metadata for extension types
paleolimbot Apr 14, 2022
7ecfa13
revert changes to metadata.R
paleolimbot Apr 14, 2022
c144a22
move sf test to the backwards compatability tests
paleolimbot Apr 14, 2022
1a98920
test that no metadata is written for extensiontype columns
paleolimbot Apr 14, 2022
841a48e
clang-format
paleolimbot Apr 14, 2022
57b77cf
documentation
paleolimbot Apr 14, 2022
48b7b13
cpp lint
paleolimbot Apr 14, 2022
0602e2f
add test for type S3 methods
paleolimbot Apr 14, 2022
ab83953
enforce type that gets returned by as_arrow_array()
paleolimbot Apr 14, 2022
48b7a67
fix arrow available
paleolimbot Apr 14, 2022
e013352
one more arrow without arrow fix
paleolimbot Apr 14, 2022
8504840
skip record batch reader tests if datasets not available
paleolimbot Apr 14, 2022
4311432
use tibble() to avoid stringsAsFactors on R<=3.6
paleolimbot Apr 14, 2022
0d10e73
use example_with_metadata instead of times to check metadata
paleolimbot Apr 14, 2022
11e0bee
lint
paleolimbot Apr 14, 2022
601a2ed
do some explaining, clean up extra things that accumulated
paleolimbot Apr 14, 2022
5003f6a
rebase
paleolimbot Apr 19, 2022
43e6ee5
better doc for as_chunked_array()
paleolimbot Apr 18, 2022
a77a42b
type -> infer_type
paleolimbot Apr 18, 2022
5fad0fc
Update r/R/record-batch.R
paleolimbot Apr 18, 2022
2329b29
fix as_record_batch.Table()
paleolimbot Apr 18, 2022
ac9f113
better documentation for infer_type()
paleolimbot Apr 18, 2022
82f2b33
simplify error handling for as_writable_table()
paleolimbot Apr 18, 2022
b66fa28
add tests for extension types
paleolimbot Apr 19, 2022
388993b
stab at structarray conversion with extension type elements
paleolimbot Apr 19, 2022
257822c
use the documented bit for call in abort
paleolimbot Apr 19, 2022
e8d409a
better names and maybe more intuitive implementation for as_arrow_arr…
paleolimbot Apr 19, 2022
cbde188
use infer_type() instead of type() and deprecate type()
paleolimbot Apr 19, 2022
fd2f43e
add as_*() methods for Python objects
paleolimbot Apr 19, 2022
3471745
type -> infer_type in pkgdown.yml
paleolimbot Apr 19, 2022
5d8e7c0
don't lint long S3 method names
paleolimbot Apr 20, 2022
a5f0c0e
ipc_stream -> ipc-stream
paleolimbot Apr 20, 2022
82c8b82
update table/record batch tests now that metadata is identical
paleolimbot Apr 20, 2022
ec68ce0
fix Array error snapshots
paleolimbot Apr 20, 2022
a5b9313
no more examplesIf arrow_available() for files in this PR
paleolimbot Apr 20, 2022
929dc18
better documentation for as_arrow_array()
paleolimbot Apr 20, 2022
2482e1d
make sure Array$create() and as_arrow_array() do the same thing for d…
paleolimbot Apr 20, 2022
46e8467
update comments to clarify that C++ is tried first for infer_type() a…
paleolimbot Apr 20, 2022
36bab18
examples for as_record_batch() and as_arrow_table(), use RecordBatch$…
paleolimbot Apr 20, 2022
59f726f
move old_example_with_metadata to test-backwards-compatability
paleolimbot Apr 20, 2022
fd167c0
handle x where `vctrs::vec_is()` in default methods
paleolimbot Apr 20, 2022
864dfce
fix typo in test title, move vctrs stuff to the default method
paleolimbot Apr 20, 2022
5f6756b
fix roundtripping of Table with metadata/extension type
paleolimbot Apr 20, 2022
2790d1e
infer vctrs storage type based on the whole vector (not just the ptype)
paleolimbot Apr 21, 2022
d832d02
simplify if/else in as_arrow_array/infer_type default methods
paleolimbot Apr 21, 2022
560458b
link JIRA for StructArray$create() hack
paleolimbot Apr 21, 2022
edec1d5
add RecordBatchReader from batches
paleolimbot Apr 21, 2022
6859e87
add todo for column-wise construction of tables for python bridge
paleolimbot Apr 21, 2022
752e115
Don't try to write Feather V1 with VctrsExtensionType column
paleolimbot Apr 22, 2022
df42412
update test-read-files for feather v1 and for roundtripping of classe…
paleolimbot Apr 22, 2022
f4e98cf
clean up C pointers
paleolimbot Apr 22, 2022
6a6ea12
fix snapshot test files
paleolimbot Apr 22, 2022
b21126e
fix more references to the special string column
paleolimbot Apr 22, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ Collate:
'filesystem.R'
'flight.R'
'install-arrow.R'
'ipc_stream.R'
'ipc-stream.R'
'json.R'
'memory-pool.R'
'message.R'
Expand Down
53 changes: 50 additions & 3 deletions r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,45 @@ S3method(as.list,ArrowTabular)
S3method(as.list,Schema)
S3method(as.raw,Buffer)
S3method(as.vector,ArrowDatum)
S3method(as_arrow_array,Array)
S3method(as_arrow_array,ChunkedArray)
S3method(as_arrow_array,Scalar)
S3method(as_arrow_array,data.frame)
S3method(as_arrow_array,default)
S3method(as_arrow_array,pyarrow.lib.Array)
S3method(as_arrow_table,RecordBatch)
S3method(as_arrow_table,Table)
S3method(as_arrow_table,data.frame)
S3method(as_arrow_table,default)
S3method(as_arrow_table,pyarrow.lib.RecordBatch)
S3method(as_arrow_table,pyarrow.lib.Table)
S3method(as_chunked_array,Array)
S3method(as_chunked_array,ChunkedArray)
S3method(as_chunked_array,default)
S3method(as_chunked_array,pyarrow.lib.ChunkedArray)
S3method(as_data_type,DataType)
S3method(as_data_type,Field)
S3method(as_data_type,Schema)
S3method(as_data_type,pyarrow.lib.DataType)
S3method(as_data_type,pyarrow.lib.Field)
S3method(as_record_batch,RecordBatch)
S3method(as_record_batch,Table)
S3method(as_record_batch,data.frame)
S3method(as_record_batch,pyarrow.lib.RecordBatch)
S3method(as_record_batch,pyarrow.lib.Table)
S3method(as_record_batch_reader,Dataset)
S3method(as_record_batch_reader,RecordBatch)
S3method(as_record_batch_reader,RecordBatchReader)
S3method(as_record_batch_reader,Scanner)
S3method(as_record_batch_reader,Table)
S3method(as_record_batch_reader,arrow_dplyr_query)
S3method(as_record_batch_reader,data.frame)
S3method(as_record_batch_reader,pyarrow.lib.RecordBatch)
S3method(as_record_batch_reader,pyarrow.lib.RecordBatchReader)
S3method(as_record_batch_reader,pyarrow.lib.Table)
S3method(as_schema,Schema)
S3method(as_schema,StructType)
S3method(as_schema,pyarrow.lib.Schema)
S3method(c,Array)
S3method(c,ChunkedArray)
S3method(c,Dataset)
Expand All @@ -55,6 +94,9 @@ S3method(head,Dataset)
S3method(head,RecordBatchReader)
S3method(head,Scanner)
S3method(head,arrow_dplyr_query)
S3method(infer_type,ArrowDatum)
S3method(infer_type,Expression)
S3method(infer_type,default)
S3method(is.finite,ArrowDatum)
S3method(is.infinite,ArrowDatum)
S3method(is.na,ArrowDatum)
Expand Down Expand Up @@ -104,9 +146,6 @@ S3method(tail,Dataset)
S3method(tail,RecordBatchReader)
S3method(tail,Scanner)
S3method(tail,arrow_dplyr_query)
S3method(type,ArrowDatum)
S3method(type,Expression)
S3method(type,default)
S3method(unique,ArrowDatum)
S3method(vec_ptype_abbr,arrow_fixed_size_binary)
S3method(vec_ptype_abbr,arrow_fixed_size_list)
Expand Down Expand Up @@ -214,6 +253,13 @@ export(arrow_with_engine)
export(arrow_with_json)
export(arrow_with_parquet)
export(arrow_with_s3)
export(as_arrow_array)
export(as_arrow_table)
export(as_chunked_array)
export(as_data_type)
export(as_record_batch)
export(as_record_batch_reader)
export(as_schema)
export(binary)
export(bool)
export(boolean)
Expand Down Expand Up @@ -253,6 +299,7 @@ export(float32)
export(float64)
export(halffloat)
export(hive_partition)
export(infer_type)
export(install_arrow)
export(install_pyarrow)
export(int16)
Expand Down
133 changes: 131 additions & 2 deletions r/R/array.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
#'
#' @rdname array
#' @name array
#' @examplesIf arrow_available()
#' @examples
#' my_array <- Array$create(1:10)
#' my_array$type
#' my_array$cast(int8())
Expand Down Expand Up @@ -217,6 +217,135 @@ Array$create <- function(x, type = NULL) {
Array$import_from_c <- ImportArray


#' Convert an object to an Arrow Array
#'
#' The `as_arrow_array()` function is identical to `Array$create()` except
#' that it is an S3 generic, which allows methods to be defined in other
#' packages to convert objects to [Array]. `Array$create()` is slightly faster
#' because it tries to convert in C++ before falling back on
#' `as_arrow_array()`.
#'
#' @param x An object to convert to an Arrow Array
#' @param ... Passed to S3 methods
#' @param type A [type][data-type] for the final Array. A value of `NULL`
#' will default to the type guessed by [infer_type()].
#'
#' @return An [Array] with type `type`.
#' @export
#'
#' @examples
#' as_arrow_array(1:5)
#'
as_arrow_array <- function(x, ..., type = NULL) {
UseMethod("as_arrow_array")
}

#' @export
as_arrow_array.default <- function(x, ..., type = NULL, from_vec_to_array = FALSE) {
# If from_vec_to_array is TRUE, this is a call from C++ after
# trying the internal C++ conversion and S3 dispatch has failed
# failed to find a method for the object. This call happens when creating
# Array, ChunkedArray, RecordBatch, and Table objects from data.frame
# if the internal C++ conversion (faster and can usually be parallelized)
# is not implemented. If the C++ call has reached this default method,
# we error. If from_vec_to_array is FALSE, we call vec_to_Array to use the
# internal C++ conversion.
if (from_vec_to_array) {
# Last ditch attempt: if vctrs::vec_is(x), we can use the vctrs
# extension type.
if (vctrs::vec_is(x) && is.null(type)) {
vctrs_extension_array(x)
} else if (vctrs::vec_is(x) && inherits(type, "VctrsExtensionType")) {
vctrs_extension_array(
x,
ptype = type$ptype(),
storage_type = type$storage_type()
)
} else {
stop_cant_convert_array(x, type)
}
} else {
vec_to_Array(x, type)
}
}

#' @rdname as_arrow_array
#' @export
as_arrow_array.Array <- function(x, ..., type = NULL) {
if (is.null(type)) {
x
} else {
x$cast(type)
}
}

#' @rdname as_arrow_array
#' @export
as_arrow_array.Scalar <- function(x, ..., type = NULL) {
as_arrow_array(x$as_array(), ..., type = type)
}

#' @rdname as_arrow_array
#' @export
as_arrow_array.ChunkedArray <- function(x, ..., type = NULL) {
concat_arrays(!!! x$chunks, type = type)
}

# data.frame conversion can happen in C++ when all the columns can be
# converted in C++ and when `type` is not an ExtensionType; however,
# when calling as_arrow_array(), this method will get called regardless
# of whether or not this can or can't happen.
#' @export
as_arrow_array.data.frame <- function(x, ..., type = NULL) {
type <- type %||% infer_type(x)

if (inherits(type, "VctrsExtensionType")) {
storage <- as_arrow_array(x, type = type$storage_type())
new_extension_array(storage, type)
} else if (inherits(type, "StructType")) {
fields <- type$fields()
names <- map_chr(fields, "name")
types <- map(fields, "type")
arrays <- Map(as_arrow_array, x, types)
names(arrays) <- names

# TODO(ARROW-16266): a hack because there is no StructArray$create() yet
batch <- record_batch(!!! arrays)
array_ptr <- allocate_arrow_array()
schema_ptr <- allocate_arrow_schema()
on.exit({
delete_arrow_array(array_ptr)
delete_arrow_schema(schema_ptr)
})

batch$export_to_c(array_ptr, schema_ptr)
Array$import_from_c(array_ptr, schema_ptr)
} else {
stop_cant_convert_array(x, type)
}
}

stop_cant_convert_array <- function(x, type) {
if (is.null(type)) {
abort(
sprintf(
"Can't create Array from object of type %s",
paste(class(x), collapse = " / ")
),
call = rlang::caller_env()
)
} else {
abort(
sprintf(
"Can't create Array<%s> from object of type %s",
format(type$code()),
paste(class(x), collapse = " / ")
),
call = rlang::caller_env()
)
}
}

#' Concatenate zero or more Arrays
#'
#' Concatenates zero or more [Array] objects into a single
Expand All @@ -231,7 +360,7 @@ Array$import_from_c <- ImportArray
#' @return A single [Array]
#' @export
#'
#' @examplesIf arrow_available()
#' @examples
#' concat_arrays(Array$create(1:3), Array$create(4:5))
concat_arrays <- function(..., type = NULL) {
dots <- lapply(list2(...), Array$create, type = type)
Expand Down
8 changes: 8 additions & 0 deletions r/R/arrowExports.R

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

46 changes: 45 additions & 1 deletion r/R/chunked-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
#' @rdname ChunkedArray
#' @name ChunkedArray
#' @seealso [Array]
#' @examplesIf arrow_available()
#' @examples
#' # Pass items into chunked_array as separate objects to create chunks
#' class_scores <- chunked_array(c(87, 88, 89), c(94, 93, 92), c(71, 72, 73))
#' class_scores$num_chunks
Expand Down Expand Up @@ -170,3 +170,47 @@ c.ChunkedArray <- function(...) {
#' @rdname ChunkedArray
#' @export
chunked_array <- ChunkedArray$create

#' Convert an object to an Arrow ChunkedArray
#'
#' Whereas [chunked_array()] constructs a [ChunkedArray] from zero or more
#' [Array]s or R vectors, `as_chunked_array()` converts a single object to a
#' [ChunkedArray].
#'
#' @param x An object to convert to an Arrow Chunked Array
#' @inheritParams as_arrow_array
#'
#' @return A [ChunkedArray].
#' @export
#'
#' @examples
#' as_chunked_array(1:5)
#'
as_chunked_array <- function(x, ..., type = NULL) {
UseMethod("as_chunked_array")
}

#' @rdname as_chunked_array
#' @export
as_chunked_array.ChunkedArray <- function(x, ..., type = NULL) {
if (is.null(type)) {
x
} else {
x$cast(type)
}
}

#' @rdname as_chunked_array
#' @export
as_chunked_array.Array <- function(x, ..., type = NULL) {
if (is.null(type)) {
chunked_array(x)
} else {
chunked_array(x$cast(type))
}
}

#' @export
as_chunked_array.default <- function(x, ..., type = NULL) {
ChunkedArray$create(x)
}
8 changes: 7 additions & 1 deletion r/R/dplyr-funcs-datetime.R
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,13 @@ register_bindings_datetime <- function() {
})
register_binding("tz", function(x) {
if (!call_binding("is.POSIXct", x)) {
abort(paste0("timezone extraction for objects of class `", type(x)$ToString(), "` not supported in Arrow"))
abort(
paste0(
"timezone extraction for objects of class `",
infer_type(x)$ToString(),
"` not supported in Arrow"
)
)
}

x$type()$timezone()
Expand Down
16 changes: 9 additions & 7 deletions r/R/extension.R
Original file line number Diff line number Diff line change
Expand Up @@ -180,11 +180,13 @@ ExtensionType <- R6Class("ExtensionType",
} else if (inherits(extension_array, "ExtensionArray")) {
extension_array$storage()$as_vector()
} else {
classes <- paste(class(extension_array), collapse = " / ")
abort(
c(
"`extension_array` must be a ChunkedArray or ExtensionArray",
i = glue::glue("Got object of type {classes}")
i = sprintf(
"Got object of type %s",
paste(class(extension_array), collapse = " / ")
)
)
)
}
Expand Down Expand Up @@ -309,7 +311,7 @@ ExtensionType$create <- function(storage_type,
#' and `reregister_extension_type()` return `NULL`, invisibly.
#' @export
#'
#' @examplesIf arrow_available()
#' @examples
#' # Create the R6 type whose methods control how Array objects are
#' # converted to R objects, how equality between types is computed,
#' # and how types are printed.
Expand Down Expand Up @@ -509,7 +511,7 @@ VctrsExtensionType <- R6Class("VctrsExtensionType",
#' extension name "arrow.r.vctrs".
#' @export
#'
#' @examplesIf arrow_available()
#' @examples
#' (array <- vctrs_extension_array(as.POSIXlt("2022-01-02 03:45", tz = "UTC")))
#' array$type
#' as.vector(array)
Expand All @@ -532,9 +534,9 @@ vctrs_extension_array <- function(x, ptype = vctrs::vec_ptype(x),

#' @rdname vctrs_extension_array
#' @export
vctrs_extension_type <- function(ptype,
storage_type = type(vctrs::vec_data(ptype))) {
ptype <- vctrs::vec_ptype(ptype)
vctrs_extension_type <- function(x,
storage_type = infer_type(vctrs::vec_data(x))) {
ptype <- vctrs::vec_ptype(x)

new_extension_type(
storage_type = storage_type,
Expand Down
6 changes: 1 addition & 5 deletions r/R/feather.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,7 @@ write_feather <- function(x,
compression <- compression_from_name(compression)

x_out <- x
if (is.data.frame(x) || inherits(x, "RecordBatch")) {
x <- Table$create(x)
}

assert_that(is_writable_table(x))
x <- as_writable_table(x)

if (!inherits(sink, "OutputStream")) {
sink <- make_output_stream(sink)
Expand Down
Loading