@@ -99,53 +99,6 @@ def test_offset_of_sliced_array():
9999 # check_index=False, check_names=False)
100100
101101
102- # Currently errors due to string conversion
103- # as col.size is called as a property not method in pandas
104- # see L255-L257 in pandas/core/interchange/from_dataframe.py
105- @pytest .mark .pandas
106- def test_categorical_roundtrip ():
107- pytest .skip ("Bug in pandas implementation" )
108-
109- if Version (pd .__version__ ) < Version ("1.5.0" ):
110- pytest .skip ("__dataframe__ added to pandas in 1.5.0" )
111-
112- arr = ["Mon" , "Tue" , "Mon" , "Wed" , "Mon" , "Thu" , "Fri" , "Sat" , "Sun" ]
113- table = pa .table (
114- {"weekday" : pa .array (arr ).dictionary_encode ()}
115- )
116-
117- pandas_df = table .to_pandas ()
118- result = pi .from_dataframe (pandas_df )
119-
120- # Checking equality for the values
121- # As the dtype of the indices is changed from int32 in pa.Table
122- # to int64 in pandas interchange protocol implementation
123- assert result [0 ].chunk (0 ).dictionary == table [0 ].chunk (0 ).dictionary
124-
125- table_protocol = table .__dataframe__ ()
126- result_protocol = result .__dataframe__ ()
127-
128- assert table_protocol .num_columns () == result_protocol .num_columns ()
129- assert table_protocol .num_rows () == result_protocol .num_rows ()
130- assert table_protocol .num_chunks () == result_protocol .num_chunks ()
131- assert table_protocol .column_names () == result_protocol .column_names ()
132-
133- col_table = table_protocol .get_column (0 )
134- col_result = result_protocol .get_column (0 )
135-
136- assert col_result .dtype [0 ] == DtypeKind .CATEGORICAL
137- assert col_result .dtype [0 ] == col_table .dtype [0 ]
138- assert col_result .size == col_table .size
139- assert col_result .offset == col_table .offset
140-
141- desc_cat_table = col_result .describe_categorical
142- desc_cat_result = col_result .describe_categorical
143-
144- assert desc_cat_table ["is_ordered" ] == desc_cat_result ["is_ordered" ]
145- assert desc_cat_table ["is_dictionary" ] == desc_cat_result ["is_dictionary" ]
146- assert isinstance (desc_cat_result ["categories" ]._col , pa .Array )
147-
148-
149102@pytest .mark .pandas
150103@pytest .mark .parametrize (
151104 "uint" , [pa .uint8 (), pa .uint16 (), pa .uint32 ()]
@@ -170,6 +123,7 @@ def test_pandas_roundtrip(uint, int, float, np_float):
170123 "a" : pa .array (arr , type = uint ),
171124 "b" : pa .array (arr , type = int ),
172125 "c" : pa .array (np .array (arr , dtype = np_float ), type = float ),
126+ "d" : [True , False , True ],
173127 }
174128 )
175129 from pandas .api .interchange import (
@@ -189,10 +143,10 @@ def test_pandas_roundtrip(uint, int, float, np_float):
189143
190144
191145@pytest .mark .pandas
192- def test_roundtrip_pandas_string ():
146+ def test_pandas_roundtrip_string ():
193147 # See https://github.com/pandas-dev/pandas/issues/50554
194148 if Version (pd .__version__ ) < Version ("1.6" ):
195- pytest .skip (" Column.size() called as a method in pandas 2.0.0 " )
149+ pytest .skip ("Column.size() bug in pandas" )
196150
197151 arr = ["a" , "" , "c" ]
198152 table = pa .table ({"a" : pa .array (arr )})
@@ -218,10 +172,10 @@ def test_roundtrip_pandas_string():
218172
219173
220174@pytest .mark .pandas
221- def test_roundtrip_pandas_large_string ():
175+ def test_pandas_roundtrip_large_string ():
222176 # See https://github.com/pandas-dev/pandas/issues/50554
223177 if Version (pd .__version__ ) < Version ("1.6" ):
224- pytest .skip (" Column.size() called as a method in pandas 2.0.0 " )
178+ pytest .skip ("Column.size() bug in pandas" )
225179
226180 arr = ["a" , "" , "c" ]
227181 table = pa .table ({"a_large" : pa .array (arr , type = pa .large_string ())})
@@ -255,10 +209,10 @@ def test_roundtrip_pandas_large_string():
255209
256210
257211@pytest .mark .pandas
258- def test_roundtrip_pandas_string_with_missing ():
212+ def test_pandas_roundtrip_string_with_missing ():
259213 # See https://github.com/pandas-dev/pandas/issues/50554
260214 if Version (pd .__version__ ) < Version ("1.6" ):
261- pytest .skip (" Column.size() called as a method in pandas 2.0.0 " )
215+ pytest .skip ("Column.size() bug in pandas" )
262216
263217 arr = ["a" , "" , "c" , None ]
264218 table = pa .table ({"a" : pa .array (arr ),
@@ -287,19 +241,28 @@ def test_roundtrip_pandas_string_with_missing():
287241
288242
289243@pytest .mark .pandas
290- def test_roundtrip_pandas_boolean ():
291- if Version (pd .__version__ ) < Version ("1.5.0 " ):
292- pytest .skip ("__dataframe__ added to pandas in 1.5.0 " )
244+ def test_pandas_roundtrip_categorical ():
245+ if Version (pd .__version__ ) < Version ("2.0.2 " ):
246+ pytest .skip ("Bitmasks not supported in pandas interchange implementation " )
293247
294- table = pa .table ({"a" : [True , False , True ]})
248+ arr = ["Mon" , "Tue" , "Mon" , "Wed" , "Mon" , "Thu" , "Fri" , "Sat" , None ]
249+ table = pa .table (
250+ {"weekday" : pa .array (arr ).dictionary_encode ()}
251+ )
295252
296253 from pandas .api .interchange import (
297254 from_dataframe as pandas_from_dataframe
298255 )
299256 pandas_df = pandas_from_dataframe (table )
300257 result = pi .from_dataframe (pandas_df )
301258
302- assert table .equals (result )
259+ assert result ["weekday" ].to_pylist () == table ["weekday" ].to_pylist ()
260+ assert pa .types .is_dictionary (table ["weekday" ].type )
261+ assert pa .types .is_dictionary (result ["weekday" ].type )
262+ assert pa .types .is_string (table ["weekday" ].chunk (0 ).dictionary .type )
263+ assert pa .types .is_large_string (result ["weekday" ].chunk (0 ).dictionary .type )
264+ assert pa .types .is_int32 (table ["weekday" ].chunk (0 ).indices .type )
265+ assert pa .types .is_int8 (result ["weekday" ].chunk (0 ).indices .type )
303266
304267 table_protocol = table .__dataframe__ ()
305268 result_protocol = result .__dataframe__ ()
@@ -309,10 +272,25 @@ def test_roundtrip_pandas_boolean():
309272 assert table_protocol .num_chunks () == result_protocol .num_chunks ()
310273 assert table_protocol .column_names () == result_protocol .column_names ()
311274
275+ col_table = table_protocol .get_column (0 )
276+ col_result = result_protocol .get_column (0 )
277+
278+ assert col_result .dtype [0 ] == DtypeKind .CATEGORICAL
279+ assert col_result .dtype [0 ] == col_table .dtype [0 ]
280+ assert col_result .size () == col_table .size ()
281+ assert col_result .offset == col_table .offset
282+
283+ desc_cat_table = col_result .describe_categorical
284+ desc_cat_result = col_result .describe_categorical
285+
286+ assert desc_cat_table ["is_ordered" ] == desc_cat_result ["is_ordered" ]
287+ assert desc_cat_table ["is_dictionary" ] == desc_cat_result ["is_dictionary" ]
288+ assert isinstance (desc_cat_result ["categories" ]._col , pa .Array )
289+
312290
313291@pytest .mark .pandas
314292@pytest .mark .parametrize ("unit" , ['s' , 'ms' , 'us' , 'ns' ])
315- def test_roundtrip_pandas_datetime (unit ):
293+ def test_pandas_roundtrip_datetime (unit ):
316294 if Version (pd .__version__ ) < Version ("1.5.0" ):
317295 pytest .skip ("__dataframe__ added to pandas in 1.5.0" )
318296 from datetime import datetime as dt
@@ -384,45 +362,6 @@ def test_pandas_to_pyarrow_float16_with_missing():
384362 pi .from_dataframe (df )
385363
386364
387- @pytest .mark .pandas
388- def test_pandas_to_pyarrow_string_with_missing ():
389- if Version (pd .__version__ ) < Version ("1.5.0" ):
390- pytest .skip ("__dataframe__ added to pandas in 1.5.0" )
391-
392- # pandas is using int64 offsets for string dtype so the constructed
393- # pyarrow string column will always be a large_string data type
394- arr = {
395- "Y" : ["a" , "b" , None ], # bool, ColumnNullType.USE_BYTEMASK,
396- }
397- df = pd .DataFrame (arr )
398- expected = pa .table (arr )
399- result = pi .from_dataframe (df )
400-
401- assert result [0 ].to_pylist () == expected [0 ].to_pylist ()
402- assert pa .types .is_string (expected [0 ].type )
403- assert pa .types .is_large_string (result [0 ].type )
404-
405-
406- @pytest .mark .pandas
407- def test_pandas_to_pyarrow_categorical_with_missing ():
408- if Version (pd .__version__ ) < Version ("1.5.0" ):
409- pytest .skip ("__dataframe__ added to pandas in 1.5.0" )
410-
411- arr = ["Mon" , "Tue" , "Mon" , "Wed" , "Mon" , "Thu" , "Fri" , "Sat" , None ]
412- df = pd .DataFrame (
413- {"weekday" : arr }
414- )
415- df = df .astype ("category" )
416- result = pi .from_dataframe (df )
417-
418- expected_dictionary = ["Fri" , "Mon" , "Sat" , "Thu" , "Tue" , "Wed" ]
419- expected_indices = pa .array ([1 , 4 , 1 , 5 , 1 , 3 , 0 , 2 , None ], type = pa .int8 ())
420-
421- assert result [0 ].to_pylist () == arr
422- assert result [0 ].chunk (0 ).dictionary .to_pylist () == expected_dictionary
423- assert result [0 ].chunk (0 ).indices .equals (expected_indices )
424-
425-
426365@pytest .mark .parametrize (
427366 "uint" , [pa .uint8 (), pa .uint16 (), pa .uint32 ()]
428367)
0 commit comments