-
-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Closed
Labels
Description
In a recent travis.ci test I see the following:
dask/dataframe/tests/test_io.py::test_usecols PASSED
dask/dataframe/tests/test_io.py::test_dummy_from_array PASSED
dask/dataframe/tests/test_io.py::test_dummy_from_1darray /home/travis/build.sh: line 45: 2972 Segmentation fault (core dumped) py.test dask --verbose
This likely means that something within Pandas is not responding well to running in threads.
Relevant functions
def test_dummy_from_1darray():
x = np.array([1., 2., 3.], dtype=np.float64)
res = dd.io._dummy_from_array(x)
assert isinstance(res, pd.Series)
assert res.dtype == np.float64
x = np.array([1, 2, 3], dtype=np.object_)
res = dd.io._dummy_from_array(x, columns='x')
assert isinstance(res, pd.Series)
assert res.name == 'x'
assert res.dtype == np.object_
x = np.array([1, 2, 3], dtype=np.object_)
res = dd.io._dummy_from_array(x, columns=['x'])
assert isinstance(res, pd.DataFrame)
assert res['x'].dtype == np.object_
tm.assert_index_equal(res.columns, pd.Index(['x']))
msg = r"""Length mismatch: Expected axis has 1 elements, new values have 2 elements"""
with tm.assertRaisesRegexp(ValueError, msg):
dd.io._dummy_from_array(x, columns=['a', 'b'])
def _dummy_from_array(x, columns=None):
""" Create empty pd.DataFrame or pd.Series which has correct dtype """
if x.ndim > 2:
raise ValueError('from_array does not input more than 2D array, got'
' array with shape %r' % (x.shape,))
if getattr(x.dtype, 'names', None) is not None:
# record array has named columns
cols = tuple(x.dtype.names)
dtypes = [x.dtype.fields[n][0] for n in x.dtype.names]
elif x.ndim == 1 and (np.isscalar(columns) or columns is None):
# Series
return pd.Series([], name=columns, dtype=x.dtype)
else:
cols = list(range(x.shape[1])) if x.ndim == 2 else [0]
dtypes = [x.dtype] * len(cols)
data = {}
for c, dt in zip(cols, dtypes):
data[c] = np.array([], dtype=dt)
data = pd.DataFrame(data, columns=cols)
if columns is not None:
# if invalid, raise error from pandas
data.columns = columns
return data