Skip to content

Commit edc1d78

Browse files
committed
[DF] Move from namedtuples to classes in Ranges.py
1 parent 14c298e commit edc1d78

File tree

1 file changed

+90
-11
lines changed
  • bindings/experimental/distrdf/python/DistRDF

1 file changed

+90
-11
lines changed

bindings/experimental/distrdf/python/DistRDF/Ranges.py

Lines changed: 90 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,90 @@
55

66
logger = logging.getLogger(__name__)
77

8-
EmptySourceRange = collections.namedtuple("EmptySourceRange", ["start", "end"])
9-
TreeRange = collections.namedtuple("TreeRange", ["start", "end", "filelist", "friend_info"])
8+
class EmptySourceRange(object):
9+
"""
10+
Empty source range of entries
11+
12+
Attributes:
13+
start (int): Starting entry of this range.
14+
15+
end (int): Ending entry of this range.
16+
"""
17+
18+
def __init__(self, start, end):
19+
"""set attributes"""
20+
self.start = start
21+
self.end = end
22+
23+
class TreeRange(object):
24+
"""
25+
TTree range of entries. The entries are global in the chain made from the
26+
input files of the dataset.
27+
28+
Attributes:
29+
start (int): Starting entry of this range.
30+
31+
end (int): Ending entry of this range.
32+
33+
filelist (list[str]): Input files of the dataset.
34+
35+
friend_info (DistRDF.HeadNode.FriendInfo): Information about friend trees.
36+
"""
37+
38+
def __init__(self, start, end, filelist, friend_info):
39+
"""set attributes"""
40+
self.start = start
41+
self.end = end
42+
self.filelist = filelist
43+
self.friend_info = friend_info
44+
45+
46+
class FileAndIndex(object):
47+
"""
48+
This is a pair (filename, index) that represents the index of the current
49+
filename in the list of input files of the dataset
50+
51+
Attributes:
52+
filename (str): The name of the file.
53+
54+
index (int): The index of the file in the list of input files.
55+
"""
56+
57+
def __init__(self, filename, fileindex):
58+
"""set attributes"""
59+
self.filename = filename
60+
self.fileindex = fileindex
61+
62+
class ChainCluster(object):
63+
"""
64+
Descriptor of a cluster of entries in a TChain. Uses global entries rather
65+
than local.
66+
67+
Attributes:
68+
start (int): The starting global entry of this cluster in the chain.
69+
70+
end (int): The ending global entry of this cluster in the chain.
71+
72+
offset (int): The offset of this cluster in the chain. That is, the starting
73+
entry of the file this cluster belongs to in the chain.
74+
75+
filetuple (FileAndIndex): A pair with the name of the file this cluster
76+
belongs to and the index of that file in the chain.
77+
"""
78+
79+
def __init__(self, start, end, offset, filetuple):
80+
"""set attributes"""
81+
self.start = start
82+
self.end = end
83+
self.offset = offset
84+
self.filetuple = filetuple
85+
86+
def __lt__(self, other):
87+
"""
88+
In `get_clustered_ranges` we need to retrieve the minimum and maximum
89+
entries in a certain list of clusters.
90+
"""
91+
return self.start < other.start and self.end < other.end
1092

1193

1294
def _n_even_chunks(iterable, n_chunks):
@@ -79,10 +161,7 @@ def get_clusters(treename, filelist):
79161
"""
80162

81163
clusters = []
82-
cluster = collections.namedtuple(
83-
"cluster", ["start", "end", "offset", "filetuple"])
84-
fileandindex = collections.namedtuple("fileandindex",
85-
["filename", "index"])
164+
86165
offset = 0
87166
fileindex = 0
88167

@@ -97,8 +176,8 @@ def get_clusters(treename, filelist):
97176

98177
while start < entries:
99178
end = it()
100-
clusters.append(cluster(start + offset, end + offset, offset,
101-
fileandindex(filename, fileindex)))
179+
clusters.append(ChainCluster(start + offset, end + offset, offset,
180+
FileAndIndex(filename, fileindex)))
102181
start = end
103182

104183
fileindex += 1
@@ -249,13 +328,13 @@ def get_clustered_ranges(clustersinfiles, npartitions, treename, friend_info):
249328
"""
250329
clustered_ranges = [
251330
TreeRange(
252-
min(clusters)[0] - clusters[0].offset, # type: int
253-
max(clusters)[1] - clusters[0].offset, # type: int
331+
min(clusters).start - clusters[0].offset, # type: int
332+
max(clusters).end - clusters[0].offset, # type: int
254333
[
255334
filetuple.filename
256335
for filetuple in sorted(set([
257336
cluster.filetuple for cluster in clusters
258-
]), key=lambda curtuple: curtuple[1])
337+
]), key=lambda curtuple: curtuple.fileindex)
259338
], # type: list[str]
260339
friend_info # type: DistRDF.HeadNode.FriendInfo
261340
) # type: collections.namedtuple

0 commit comments

Comments
 (0)