Skip to content

Commit b211c18

Browse files
authored
GH-35415: [Python] RecordBatch string reprsentation includes column preview (#35416)
### Rationale for this change Table and RecordBatch now share a common parent class and common APIs should behave the same. ### What changes are included in this PR? Remove override of RecordBatch string representation. ### Are these changes tested? Pytests and doctests updated. ### Are there any user-facing changes? Yes, the string representation of `RecordBatch` includes additional info. * Closes: #35415 Authored-by: Dane Pitkin <[email protected]> Signed-off-by: Alenka Frim <[email protected]>
1 parent fcf934a commit b211c18

File tree

4 files changed

+64
-7
lines changed

4 files changed

+64
-7
lines changed

python/pyarrow/array.pxi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,9 @@ cdef class _PandasConvertible(_Weakrefable):
811811
pyarrow.RecordBatch
812812
n_legs: int64
813813
animals: string
814+
----
815+
n_legs: [2,4,5,100]
816+
animals: ["Flamingo","Horse","Brittle stars","Centipede"]
814817
>>> batch.to_pandas()
815818
n_legs animals
816819
0 2 Flamingo

python/pyarrow/ipc.pxi

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,8 +645,12 @@ cdef class RecordBatchReader(_Weakrefable):
645645
... print(batch)
646646
pyarrow.RecordBatch
647647
x: int64
648+
----
649+
x: [1,2,3]
648650
pyarrow.RecordBatch
649651
x: int64
652+
----
653+
x: [1,2,3]
650654
"""
651655

652656
# cdef block is in lib.pxd

python/pyarrow/table.pxi

Lines changed: 53 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1594,6 +1594,9 @@ cdef class RecordBatch(_Tabular):
15941594
pyarrow.RecordBatch
15951595
n_legs: int64
15961596
animals: string
1597+
----
1598+
n_legs: [2,2,4,4,5,100]
1599+
animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
15971600
>>> pa.RecordBatch.from_arrays([n_legs, animals], names=names).to_pandas()
15981601
n_legs animals
15991602
0 2 Flamingo
@@ -1618,6 +1621,12 @@ cdef class RecordBatch(_Tabular):
16181621
day: int64
16191622
n_legs: int64
16201623
animals: string
1624+
----
1625+
year: [2020,2022,2021,2022]
1626+
month: [3,5,7,9]
1627+
day: [1,5,9,13]
1628+
n_legs: [2,4,5,100]
1629+
animals: ["Flamingo","Horse","Brittle stars","Centipede"]
16211630
>>> pa.RecordBatch.from_pandas(df).to_pandas()
16221631
year month day n_legs animals
16231632
0 2020 3 1 2 Flamingo
@@ -1652,6 +1661,12 @@ cdef class RecordBatch(_Tabular):
16521661
day: int64
16531662
n_legs: int64
16541663
animals: string
1664+
----
1665+
year: [2020,2022,2021,2022]
1666+
month: [3,5,7,9]
1667+
day: [1,5,9,13]
1668+
n_legs: [2,4,5,100]
1669+
animals: ["Flamingo","Horse","Brittle stars","Centipede"]
16551670
"""
16561671

16571672
def __cinit__(self):
@@ -1733,6 +1748,9 @@ cdef class RecordBatch(_Tabular):
17331748
pyarrow.RecordBatch
17341749
n_legs: int64
17351750
animals: string
1751+
----
1752+
n_legs: [2,2,4,4,5,100]
1753+
animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
17361754
>>> pa.RecordBatch.from_pydict(pydict).to_pandas()
17371755
n_legs animals
17381756
0 2 Flamingo
@@ -1788,6 +1806,10 @@ cdef class RecordBatch(_Tabular):
17881806
pyarrow.RecordBatch
17891807
n_legs: int64
17901808
animals: string
1809+
----
1810+
n_legs: [2,4]
1811+
animals: ["Flamingo","Dog"]
1812+
17911813
>>> pa.RecordBatch.from_pylist(pylist).to_pandas()
17921814
n_legs animals
17931815
0 2 Flamingo
@@ -1820,12 +1842,6 @@ cdef class RecordBatch(_Tabular):
18201842
except TypeError:
18211843
return NotImplemented
18221844

1823-
def __repr__(self):
1824-
# TODO remove this and update pytests/doctests for
1825-
# RecordBatch.to_string(preview_cols=10) usage in
1826-
# parent class
1827-
return self.to_string()
1828-
18291845
def validate(self, *, full=False):
18301846
"""
18311847
Perform validation checks. An exception is raised if validation fails.
@@ -2402,12 +2418,16 @@ cdef class RecordBatch(_Tabular):
24022418
>>> batch.select([1])
24032419
pyarrow.RecordBatch
24042420
animals: string
2421+
----
2422+
animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
24052423
24062424
Select columns by names:
24072425
24082426
>>> batch.select(["n_legs"])
24092427
pyarrow.RecordBatch
24102428
n_legs: int64
2429+
----
2430+
n_legs: [2,2,4,4,5,100]
24112431
"""
24122432
cdef:
24132433
shared_ptr[CRecordBatch] c_batch
@@ -2558,6 +2578,12 @@ cdef class RecordBatch(_Tabular):
25582578
day: int64
25592579
n_legs: int64
25602580
animals: string
2581+
----
2582+
year: [2020,2022,2021,2022]
2583+
month: [3,5,7,9]
2584+
day: [1,5,9,13]
2585+
n_legs: [2,4,5,100]
2586+
animals: ["Flamingo","Horse","Brittle stars","Centipede"]
25612587
25622588
Convert pandas DataFrame to RecordBatch using schema:
25632589
@@ -2569,12 +2595,17 @@ cdef class RecordBatch(_Tabular):
25692595
pyarrow.RecordBatch
25702596
n_legs: int64
25712597
animals: string
2598+
----
2599+
n_legs: [2,4,5,100]
2600+
animals: ["Flamingo","Horse","Brittle stars","Centipede"]
25722601
25732602
Convert pandas DataFrame to RecordBatch specifying columns:
25742603
25752604
>>> pa.RecordBatch.from_pandas(df, columns=["n_legs"])
25762605
pyarrow.RecordBatch
25772606
n_legs: int64
2607+
----
2608+
n_legs: [2,4,5,100]
25782609
"""
25792610
from pyarrow.pandas_compat import dataframe_to_arrays
25802611
arrays, schema, n_rows = dataframe_to_arrays(
@@ -2622,6 +2653,9 @@ cdef class RecordBatch(_Tabular):
26222653
pyarrow.RecordBatch
26232654
n_legs: int64
26242655
animals: string
2656+
----
2657+
n_legs: [2,2,4,4,5,100]
2658+
animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
26252659
>>> pa.RecordBatch.from_arrays([n_legs, animals], names=names).to_pandas()
26262660
n_legs animals
26272661
0 2 Flamingo
@@ -5063,6 +5097,9 @@ def record_batch(data, names=None, schema=None, metadata=None):
50635097
pyarrow.RecordBatch
50645098
n_legs: int64
50655099
animals: string
5100+
----
5101+
n_legs: [2,2,4,4,5,100]
5102+
animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
50665103
>>> pa.record_batch([n_legs, animals], names=["n_legs", "animals"]).to_pandas()
50675104
n_legs animals
50685105
0 2 Flamingo
@@ -5081,6 +5118,9 @@ def record_batch(data, names=None, schema=None, metadata=None):
50815118
pyarrow.RecordBatch
50825119
n_legs: int64
50835120
animals: string
5121+
----
5122+
n_legs: [2,2,4,4,5,100]
5123+
animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
50845124
>>> pa.record_batch([n_legs, animals],
50855125
... names=names,
50865126
... metadata = my_metadata).schema
@@ -5104,6 +5144,13 @@ def record_batch(data, names=None, schema=None, metadata=None):
51045144
day: int64
51055145
n_legs: int64
51065146
animals: string
5147+
----
5148+
year: [2020,2022,2021,2022]
5149+
month: [3,5,7,9]
5150+
day: [1,5,9,13]
5151+
n_legs: [2,4,5,100]
5152+
animals: ["Flamingo","Horse","Brittle stars","Centipede"]
5153+
51075154
>>> pa.record_batch(df).to_pandas()
51085155
year month day n_legs animals
51095156
0 2020 3 1 2 Flamingo

python/pyarrow/tests/test_table.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,10 @@ def test_recordbatch_basics():
529529
assert batch.schema == schema
530530
assert str(batch) == """pyarrow.RecordBatch
531531
c0: int16
532-
c1: int32"""
532+
c1: int32
533+
----
534+
c0: [0,1,2,3,4]
535+
c1: [-10,-5,0,null,10]"""
533536

534537
assert batch.to_string(show_metadata=True) == """\
535538
pyarrow.RecordBatch

0 commit comments

Comments
 (0)