-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Integer overflow in TEntryList #11026
Description
Describe the bug
With the following reproducer
#include <TChain.h>
#include <TEntryList.h>
#include <RtypesCore.h>
#include <ROOT/RDataFrame.hxx>
#include <TTreeReader.h>
#include <iostream>
#include <limits>
#include <string>
auto LIMIT = std::numeric_limits<int>::max();
Long64_t OVERLIMIT = 2147483648; // LIMIT + 1
ULong64_t NENTRIES = LIMIT / 100;
void write_tree()
{
ROOT::RDataFrame df{NENTRIES};
df.Define("x", "rdfentry_").Snapshot<ULong64_t>("tree", "file.root", {"x"});
}
void repro()
{
TChain chain{"tree"};
TEntryList elists{};
for (int i = 0; i < 101; i++){
chain.Add("file.root?#tree", NENTRIES);
TEntryList sublist{};
sublist.SetTreeName("tree");
sublist.SetFileName("file.root");
sublist.EnterRange(0, NENTRIES);
elists.AddSubList(&sublist);
}
chain.SetCacheEntryRange(0, OVERLIMIT);
chain.SetEntryList(&elists, "sync"); // This sets the tree index in the sublist
std::cout << "std::numeric_limits<int>::max(): " << LIMIT << "\n";
std::cout << "Total entries in chain: " << chain.GetEntries() << "\n";
std::cout << "Total entries in entrylist: " << elists.GetN() << "\n";
int treenum_1 = -1;
std::cout << "Giving input entry " << LIMIT << "\n";
auto eindex_1 = elists.GetEntryAndTree(LIMIT, treenum_1);
std::cout << "Got tree index " << treenum_1 << " and entry index " << eindex_1 << "\n";
int treenum_2 = -1;
std::cout << "Giving input entry " << OVERLIMIT << "\n";
auto eindex_2 = elists.GetEntryAndTree(OVERLIMIT, treenum_2);
std::cout << "Got tree index " << treenum_2 << " and entry index " << eindex_2 << "\n";
TTreeReader r{&chain, chain.GetEntryList()};
r.SetEntry(OVERLIMIT);
std::string status = (r.GetEntryStatus() == TTreeReader::EEntryStatus::kEntryBeyondEnd) ? "kEntryBeyondEnd" : "not kEntryBeyondEnd";
std::cout << "What is the entry status? " << status << "\n";
}
int main()
{
write_tree();
repro();
}And the following small diff in a couple classes to print debug statements
--- a/tree/tree/src/TEntryList.cxx
+++ b/tree/tree/src/TEntryList.cxx
@@ -155,6 +155,8 @@ See comments to those functions for more details
#include "TSystem.h"
#include "TObjString.h"
+#include <iostream>
+
ClassImp(TEntryList);
////////////////////////////////////////////////////////////////////////////////
@@ -839,6 +841,7 @@ Long64_t TEntryList::GetEntryAndTree(Int_t index, Int_t &treenum)
//Then, when GetEntryAndTree(21, treenum, kTRUE) is called, first entry of the
//third sublist will be returned
+ std::cout << "Got input index: " << index << "\n";
--- a/tree/treeplayer/src/TTreeReader.cxx
+++ b/tree/treeplayer/src/TTreeReader.cxx
@@ -18,6 +18,7 @@
#include "TTreeReaderValue.h"
#include "TFriendProxy.h"
+#include <iostream>
// clang-format off
/**
@@ -547,6 +548,7 @@ TTreeReader::EEntryStatus TTreeReader::SetEntryBase(Long64_t entry, Bool_t local
const Long64_t loadResult = treeToCallLoadOn->LoadTree(entryAfterList);
fSetEntryBaseCallingLoadTree = kFALSE;
+ std::cout << "With entryAfterList=" << entryAfterList <<" got loadResult=" << loadResult << "\n";
I get the following output
$: ./repro.o
std::numeric_limits<int>::max(): 2147483647
Total entries in chain: 2168958436
Total entries in entrylist: 2168958436
Giving input entry 2147483647
Got input index: 2147483647
Got tree index 100 and entry index 47
Giving input entry 2147483648
Got input index: -2147483648
Got tree index -1 and entry index -1
Got input index: -2147483648
With entryAfterList=1616 got loadResult=1616
What is the entry status? not kEntryBeyondEnd
Things to notice
The total number of entries in the chain/tentrylist is larget than std::numeric_limits<int>::max(). With the first call to GetEntryAndTree, I give as input exactly the int limit, and the output is correct (index of the tree = 100, index of the entry in that tree = 47). With the next call, the input is the int limit + 1, from which one would expect the same tree and just one entry beyond the previous one, i.e. 48.
Instead the result is tree index = -1 and entry index = -1
This is a bug resulting from a wrong signature in TEntryList::GetEntryAndTree:
root/tree/tree/src/TEntryList.cxx
Line 831 in 8323e50
| Long64_t TEntryList::GetEntryAndTree(Int_t index, Int_t &treenum) |
Which should accept a Long64_t like all other places in the class where an entry number is expected, but instead takes an Int_t.
This results in a further bug in TTreeReader, that calls this method at
root/tree/treeplayer/src/TTreeReader.cxx
Line 532 in 8323e50
| entryAfterList = fEntryList->GetEntryAndTree(entry, treenum); |
When the wrong entry index / tree index is returned, the consecutive call to treeToCallLoadOn->LoadTree(entryAfterList) will give result -2, like shown in the output above.
This will then set the entry status to kEntryBeyondEnd at
root/tree/treeplayer/src/TTreeReader.cxx
Lines 571 to 580 in 8323e50
| if (loadResult == -2) { | |
| fDirector->Notify(); | |
| if (fProxiesSet) { | |
| for (auto value: fValues) { | |
| value->NotifyNewTree(fTree->GetTree()); | |
| } | |
| } | |
| fEntryStatus = kEntryBeyondEnd; | |
| return fEntryStatus; | |
| } |
Expected behavior
The TEntryList is able to give the correct tree and entry index even after std::numeric_limits<int>::max().
Setup
ROOT master
GCC 12
Fedora 36