-
-
Notifications
You must be signed in to change notification settings - Fork 750
Closed
Description
I get timeout error when trying to tart a localCluster with 4 process or more
cluster = LocalCluster(processes=True, n_workers=4, death_timeout=None)#, silence_logs=logging.DEBUG)
client = Client(cluster)
I'm using dask 1.25.1 with python 2.7 running over mac
This happens also happen during tests
i modify the dask distributed test test_procs found in distributed/deploy/tests/test_local.py
like this
def test_procs():
with LocalCluster(4, scheduler_port=0, processes=True, threads_per_worker=3,
diagnostics_port=None, silence_logs=False) as c:
assert len(c.workers) == 2
assert all(isinstance(w, Nanny) for w in c.workers)
with Client(c.scheduler.address) as e:
assert all(v == 3 for v in e.ncores().values())
c.start_worker()
assert all(isinstance(w, Nanny) for w in c.workers)
repr(c)
i set the n_workers to 4 instead of 2
and i get this error
test_local.py:68:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../local.py:141: in __init__
self.start(ip=ip, n_workers=n_workers)
../local.py:174: in start
self.sync(self._start, **kwargs)
../local.py:167: in sync
return sync(self.loop, func, *args, **kwargs)
../../utils.py:277: in sync
six.reraise(*error[0])
../../utils.py:262: in f
result[0] = yield future
../../../../../anaconda/lib/python2.7/site-packages/tornado/gen.py:1133: in run
value = future.result()
../../../../../anaconda/lib/python2.7/site-packages/tornado/concurrent.py:269: in result
raise_exc_info(self._exc_info)
../../../../../anaconda/lib/python2.7/site-packages/tornado/gen.py:1141: in run
yielded = self.gen.throw(*exc_info)
../local.py:194: in _start
yield [self._start_worker(**self.worker_kwargs) for i in range(n_workers)]
../../../../../anaconda/lib/python2.7/site-packages/tornado/gen.py:1133: in run
value = future.result()
../../../../../anaconda/lib/python2.7/site-packages/tornado/concurrent.py:269: in result
raise_exc_info(self._exc_info)
../../../../../anaconda/lib/python2.7/site-packages/tornado/gen.py:883: in callback
result_list.append(f.result())
../../../../../anaconda/lib/python2.7/site-packages/tornado/concurrent.py:269: in result
raise_exc_info(self._exc_info)
../../../../../anaconda/lib/python2.7/site-packages/tornado/gen.py:1147: in run
yielded = self.gen.send(value)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = LocalCluster('tcp://127.0.0.1:55398', workers=3, ncores=9)
death_timeout = 60, kwargs = {'ncores': 3, 'quiet': True, 'services': {}}
W = <class 'distributed.nanny.Nanny'>, w = <Nanny: None, threads: 3>
@gen.coroutine
def _start_worker(self, death_timeout=60, **kwargs):
if self.status and self.status.startswith('clos'):
warnings.warn("Tried to start a worker while status=='%s'" % self.status)
return
if self.processes:
W = Nanny
kwargs['quiet'] = True
else:
W = Worker
w = W(self.scheduler.address, loop=self.loop,
death_timeout=death_timeout,
silence_logs=self.silence_logs, **kwargs)
yield w._start()
self.workers.append(w)
while w.status != 'closed' and w.worker_address not in self.scheduler.workers:
yield gen.sleep(0.01)
if w.status == 'closed' and self.scheduler.status == 'running':
self.workers.remove(w)
> raise gen.TimeoutError("Worker failed to start")
E TimeoutError: Worker failed to start
../local.py:224: TimeoutError
Metadata
Metadata
Assignees
Labels
No labels