-
-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Closed
Description
# gen_garbage_csv.py
import numpy as np
import pandas as pd
NCOLS = 9
NROWS = 2e5
df = pd.DataFrame(np.random.randn(NROWS, NCOLS))
df.to_csv('./garbage.csv', index=False)# test_garbage.py
import dask.dataframe as dd
print(dd.read_csv('./garbage.csv', header=0).count().compute())When NCOLS = 8, the garbage.csv file is ~30M, and test_garbage.py runs successfully.
When NCOLS = 9, garbage.csv is ~34M, and python test_garbage.py segfaults.
This bug only manifests when using dask (0.8.1 or master) with Python 3.4.
With Python 3.5, I see no segfaults.
In case it's helpful, here's the error dump before segfaulting.
Fatal Python error: GC object already tracked
Thread 0x0000000115ee1000 (most recent call first):
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 290 in wait
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/queue.py", line 167 in get
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/multiprocessing/pool.py", line 429 in _handle_results
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 859 in run
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 911 in _bootstrap_inner
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 879 in _bootstrap
Thread 0x00000001159de000 (most recent call first):
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 290 in wait
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/queue.py", line 167 in get
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/multiprocessing/pool.py", line 376 in _handle_tasks
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 859 in run
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 911 in _bootstrap_inner
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 879 in _bootstrap
Thread 0x00000001154db000 (most recent call first):
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/multiprocessing/pool.py", line 367 in _handle_workers
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 859 in run
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 911 in _bootstrap_inner
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 879 in _bootstrap
Thread 0x0000000114fd8000 (most recent call first):
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 290 in wait
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/queue.py", line 167 in get
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/multiprocessing/pool.py", line 108 in worker
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 859 in run
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 911 in _bootstrap_inner
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 879 in _bootstrap
Thread 0x0000000114ad5000 (most recent call first):
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 290 in wait
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/queue.py", line 167 in get
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/multiprocessing/pool.py", line 108 in worker
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 859 in run
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 911 in _bootstrap_inner
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 879 in _bootstrap
Thread 0x00000001145d2000 (most recent call first):
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 290 in wait
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/queue.py", line 167 in get
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/multiprocessing/pool.py", line 108 in worker
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 859 in run
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 911 in _bootstrap_inner
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 879 in _bootstrap
Thread 0x00000001140cf000 (most recent call first):
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 290 in wait
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/queue.py", line 167 in get
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/multiprocessing/pool.py", line 108 in worker
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 859 in run
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 911 in _bootstrap_inner
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 879 in _bootstrap
Thread 0x0000000113bcc000 (most recent call first):
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 290 in wait
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/queue.py", line 167 in get
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/multiprocessing/pool.py", line 108 in worker
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 859 in run
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 911 in _bootstrap_inner
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 879 in _bootstrap
Thread 0x00000001136c9000 (most recent call first):
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 290 in wait
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/queue.py", line 167 in get
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/multiprocessing/pool.py", line 108 in worker
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 859 in run
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 911 in _bootstrap_inner
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 879 in _bootstrap
Thread 0x00000001131c6000 (most recent call first):
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/site-packages/pandas/io/parsers.py", line 1197 in read
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/site-packages/pandas/io/parsers.py", line 747 in read
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/site-packages/pandas/io/parsers.py", line 285 in _read
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/site-packages/pandas/io/parsers.py", line 498 in parser_f
File "/Users/ksmith/work/dask/dask/dataframe/io.py", line 53 in _read_csv
File "/Users/ksmith/work/dask/dask/async.py", line 246 in _execute_task
File "/Users/ksmith/work/dask/dask/async.py", line 264 in execute_task
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/multiprocessing/pool.py", line 119 in worker
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 859 in run
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 911 in _bootstrap_inner
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 879 in _bootstrap
Current thread 0x0000000112cc3000 (most recent call first):
File "/Users/ksmith/work/dask/dask/utils.py", line 276 in textblock
File "/Users/ksmith/work/dask/dask/dataframe/io.py", line 51 in _read_csv
File "/Users/ksmith/work/dask/dask/async.py", line 246 in _execute_task
File "/Users/ksmith/work/dask/dask/async.py", line 264 in execute_task
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/multiprocessing/pool.py", line 119 in worker
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 859 in run
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 911 in _bootstrap_inner
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 879 in _bootstrap
Thread 0x00007fff7dd11300 (most recent call first):
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/threading.py", line 290 in wait
File "/Users/ksmith/anaconda/envs/blaze-3.4/lib/python3.4/queue.py", line 167 in get
File "/Users/ksmith/work/dask/dask/async.py", line 467 in get_async
File "/Users/ksmith/work/dask/dask/threaded.py", line 57 in get
File "/Users/ksmith/work/dask/dask/base.py", line 110 in compute
File "/Users/ksmith/work/dask/dask/base.py", line 37 in compute
File "test_garbage.py", line 3 in <module>
[1] 22095 abort python test_garbage.py
Metadata
Metadata
Assignees
Labels
No labels