You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: python/pyspark/rddsampler.py
+16-10Lines changed: 16 additions & 10 deletions
Original file line number
Diff line number
Diff line change
@@ -17,6 +17,7 @@
17
17
18
18
importsys
19
19
importrandom
20
+
importmath
20
21
21
22
22
23
classRDDSamplerBase(object):
@@ -37,16 +38,21 @@ def getUniformSample(self):
37
38
returnself._random.random()
38
39
39
40
defgetPoissonSample(self, mean):
40
-
# here we simulate drawing numbers n_i ~ Poisson(lambda = 1/mean) by
41
-
# drawing a sequence of numbers delta_j ~ Exp(mean)
42
-
num_arrivals=0
43
-
cur_time=self._random.expovariate(mean)
44
-
45
-
whilecur_time<1.0:
46
-
cur_time+=self._random.expovariate(mean)
47
-
num_arrivals+=1
48
-
49
-
returnnum_arrivals
41
+
# Using Knuth's algorithm described in http://en.wikipedia.org/wiki/Poisson_distribution
42
+
ifmean<20.0: # one exp and k+1 random calls
43
+
l=math.exp(-mean)
44
+
p=self._random.random()
45
+
k=0
46
+
whilep>l:
47
+
k+=1
48
+
p*=self._random.random()
49
+
else: # switch to the log domain, k+1 expovariate (random + log) calls
0 commit comments