-
Notifications
You must be signed in to change notification settings - Fork 1.5k
[DF] Issues managing TClonesArray branches #9240
Copy link
Copy link
Closed
Description
- Checked for duplicates
Describe the bug
- The
GetSize()result of theTClonesArrayfed to the (re)defining function are always 1000, not reflecting the correct value. - Redefining a variable that was the type
TClonesArraycausesGetColumnNames()method to return a column name vector with repeated column names, and trying to snapshot against related names causes fatal errors.
Expected behavior
- The
GetSize()method of the inputTClonesArrayreturns the correct size. GetColumnNames()produces unique column names.- The redefined
TClonesArraycan be snapshot correctly.
To Reproduce
- Place two ROOT macros in the same directory:
gen_tree.C
#include <Rtypes.h>
#include <TFile.h>
#include <TTree.h>
#include <TBranch.h>
#include <TClonesArray.h>
#include <TLorentzVector.h>
#include <string>
#include <cstdio>
#include <cstdlib>
#include <iostream>
Float_t gen_rand() {
return rand() / static_cast<Double_t>(RAND_MAX);
}
void gen_tree(const std::string pathFile = "file_tcatlv.root") {
TFile *tfOut = TFile::Open(pathFile.c_str(), "RECREATE");
tfOut->cd();
TTree *ttOut = new TTree("Events", "Tree of TClonesArrays of TLorentzVectors");
auto *tcaTest = new TClonesArray("TLorentzVector", 8);
size_t nVar;
ttOut->Branch("TestVar1", tcaTest);
ttOut->Branch("nTestVar1", &nVar);
constexpr size_t nEntries = 10;
for (size_t jEntry = 0; jEntry < nEntries; ++jEntry) {
tcaTest->Clear();
nVar = rand() % 8;
tcaTest->Expand(nVar);
std::cerr << "jEntry: " << jEntry << "\tnVar: " << nVar << "\tsize: " << tcaTest->GetSize() << std::endl;
for (size_t iVar = 0; iVar < nVar; ++iVar) {
const auto &pVar = new((*tcaTest)[iVar]) TLorentzVector(gen_rand(), gen_rand(), gen_rand(), gen_rand());
std::fprintf(stderr, "(%F, %F, %F, %F) ", pVar->Px(), pVar->Py(), pVar->Pz(), pVar->M());
}
std::cerr << std::endl;
ttOut->Fill();
}
ttOut->Write();
tfOut->Close();
}
int main(int argc, char* argv[]) {
if (argc > 1) {
gen_tree(argv[1]);
} else {
gen_tree();
}
return 0;
}reproduce.C
#include <Rtypes.h>
#include <TFile.h>
#include <TTree.h>
#include <TBranch.h>
#include <TH1.h>
#include <TLorentzVector.h>
#include <ROOT/RDataFrame.hxx>
#include <Math/Vector4D.h>
#include <vector>
#include <string>
void reproduce(const std::string pathFileIn = "file_tcatlv.root", const std::string pathFileOut = "file_out.root") {
TFile *tfIn = TFile::Open(pathFileIn.c_str());
TTree *ttIn = tfIn->Get<TTree>("Events");
ROOT::RDataFrame dfIn(*ttIn);
std::cerr << "Defining TestVar2 from TestVar1: TLorentzVector -> LorentzVector";
auto df1 = dfIn.Define("TestVar2", [](const TClonesArray &tcaTest, const size_t n) {
const size_t nGot = tcaTest.GetSize();
std::cerr << "TestVar1.GetSize(): " << nGot << "\tn: " << n << std::endl;
ROOT::RVec<ROOT::Math::PtEtaPhiMVector> vTest(n, ROOT::Math::PtEtaPhiMVector());
for (size_t i = 0; i < n; ++i) {
const auto &tlv = *static_cast<TLorentzVector *>(tcaTest[i]);
vTest[i].SetPxPyPzE(tlv.Px(), tlv.Py(), tlv.Pz(), tlv.E());
}
return vTest;
}, { "TestVar1", "nTestVar1" });
std::cerr << "Redefining TestVar1: TLorentzVector -> LorentzVector" << std::endl;
auto df2 = df1.Redefine("TestVar1", [](const TClonesArray &tcaTest, const size_t n) {
const size_t nGot = tcaTest.GetSize();
std::cerr << "TestVar1.GetSize(): " << nGot << "\tn: " << n << std::endl;
ROOT::RVec<ROOT::Math::PtEtaPhiMVector> vTest(n, ROOT::Math::PtEtaPhiMVector());
for (size_t i = 0; i < n; ++i) {
const auto &tlv = *static_cast<TLorentzVector *>(tcaTest[i]);
vTest[i].SetPxPyPzE(tlv.Px(), tlv.Py(), tlv.Pz(), tlv.E());
}
return vTest;
}, { "TestVar1", "nTestVar1" });
std::cerr << "Creating a custom column TestVar2" << std::endl;
auto df3 = df2.Define("TestVar3", [](ROOT::RVec<ROOT::Math::PtEtaPhiMVector> TestVar2) {
return ROOT::VecOps::Map(TestVar2, [](ROOT::Math::PtEtaPhiMVector lv){return lv.Pt();});
}, {"TestVar2"});
std::cerr << "Columns: {" << std::flush;
for (const auto nameCol: df3.GetColumnNames()) {
std::cerr << nameCol << ", " << std::flush;
}
std::cerr << "}" << std::endl;
std::cerr << "Generating histogram hTest3 with TestVar3 in df3" << std::endl;
TFile *tfOut = TFile::Open(pathFileOut.c_str(), "RECREATE");
tfOut->cd();
df3.Histo1D({ "hTest3", "", 30, 0., 3. }, "TestVar3")->Write();
std::cerr << "Closing the output file" << std::endl;
tfOut->Close();
std::cerr << "Snapshotting df3 {TestVar3} as tree3 into the output file" << std::endl;
df3.Snapshot("tree3", pathFileOut, {"TestVar3"});
std::cerr << "Snapshotting df3 {TestVar2} as tree2 into the output file" << std::endl;
df3.Snapshot("tree2", pathFileOut, {"TestVar2"});
std::cerr << "Snapshotting df3 {TestVar1} as tree1 into the output file" << std::endl;
df3.Snapshot("tree1", pathFileOut, {"TestVar1"});
std::cerr << "Closing the input file" << std::endl;
tfIn->Close();
std::cerr << "Complete!" << std::endl;
}
int main(int argc, char* argv[]) {
if (argc > 1) {
reproduce(argv[1], argv[2]);
} else {
reproduce();
}
}-
Run:
$ root -l -q 'gen_tree.C++()' $ root -l -q 'reproduce.C++()'
-
See outputs and errors.
- Duplicated column names
Columns: {TestVar2, TestVar1, TestVar3, TestVar1.fUniqueID, TestVar1.fBits, TestVar1.fP, TestVar1.fE, TestVar1, nTestVar1, }- Incorrect
GetSize()
Generating histogram hTest3 with TestVar3 in df3 TestVar1.GetSize(): 1000 n: 5 TestVar1.GetSize(): 1000 n: 1 TestVar1.GetSize(): 1000 n: 7 TestVar1.GetSize(): 1000 n: 0Snapsht error happens toTestVar1, but notTestVar2orTestVar3
Snapshot error happens to bothTestVar1andTestVar2, but notTestVar3- Full outputs:
$ root -l -q 'gen_tree.C++()'
Processing gen_tree.C++()...
Info in <TUnixSystem::ACLiC>: creating shared library /run/media/root/data-btrfs/shamrock-shared/targets/Research_LL/ctau-proper/lxplus_HTcondor/preselect/redefine_custom_vector.root/./gen_tree_C.so
Warning in <TTree::Bronch>: Using split mode on a class: TLorentzVector with a custom Streamer
jEntry: 0 nVar: 5 size: 5
(0.780836, 0.994304, 0.974700, -1.584510) (0.888099, 0.785263, 0.905259, -1.305239) (0.862221, 0.008688, 0.320079, -0.430004) (0.508159, 0.227104, 0.549878, -0.729431) (0.994887, 0.949317, 0.458636, -1.254915)
jEntry: 1 nVar: 5 size: 5
(0.651535, 0.070535, 0.859432, -0.669713) (0.848072, 0.268093, 0.718520, -1.046939) (0.307848, 0.268393, 0.240499, -0.365170) (0.243093, 0.434697, 0.190251, -0.532397) (0.339955, 0.912194, 0.890578, -1.272495)
jEntry: 2 nVar: 5 size: 5
(0.703626, 0.856803, 0.459377, -1.173015) (0.139822, 0.454264, 0.202821, 0.301843) (0.179893, 0.588359, 0.249993, -0.615069) (0.447791, 0.098274, 0.098500, 0.540937) (0.816794, 0.558164, 0.023732, -0.985904)
jEntry: 3 nVar: 3 size: 3
(0.325884, 0.328280, 0.233360, -0.045052) (0.356636, 0.573315, 0.428329, -0.760415) (0.921959, 0.660601, 0.950840, -1.258582)
jEntry: 4 nVar: 0 size: 0
jEntry: 5 nVar: 1 size: 1
(0.918584, 0.574242, 0.407165, -1.035375)
jEntry: 6 nVar: 7 size: 7
(0.995525, 0.767034, 0.004563, -1.175969) (0.865308, 0.103064, 0.159200, -0.565209) (0.661228, 0.182932, 0.767290, -0.920826) (0.508816, 0.095570, 0.693251, -0.864872) (0.452207, 0.266567, 0.453281, 0.090425) (0.188525, 0.113883, 0.650261, 0.681392) (0.233861, 0.854605, 0.885871, -0.957485)
jEntry: 7 nVar: 5 size: 5
(0.402912, 0.562238, 0.257295, -0.718170) (0.566801, 0.700610, 0.035254, -0.603856) (0.859810, 0.717357, 0.331093, -1.166906) (0.484647, 0.790985, 0.551558, -0.910002) (0.484236, 0.576510, 0.032424, -0.064769)
jEntry: 8 nVar: 0 size: 0
jEntry: 9 nVar: 2 size: 2
(0.939328, 0.143674, 0.382106, -0.476480) (0.377535, 0.236711, 0.792486, -0.890021)
$ root -l -q 'reproduce.C++()'
Processing reproduce.C++()...
Info in <TUnixSystem::ACLiC>: creating shared library /run/media/root/data-btrfs/shamrock-shared/targets/Research_LL/ctau-proper/lxplus_HTcondor/preselect/redefine_custom_vector.root/./reproduce_C.so
Defining TestVar2 from TestVar1: TLorentzVector -> LorentzVectorRedefining TestVar1: TLorentzVector -> LorentzVector
Creating a custom column TestVar2
Columns: {TestVar2, TestVar1, TestVar3, TestVar1.fUniqueID, TestVar1.fBits, TestVar1.fP, TestVar1.fE, TestVar1, nTestVar1, }
Generating histogram hTest3 with TestVar3 in df3
TestVar1.GetSize(): 1000 n: 5
TestVar1.GetSize(): 1000 n: 5
TestVar1.GetSize(): 1000 n: 5
TestVar1.GetSize(): 1000 n: 3
TestVar1.GetSize(): 1000 n: 0
TestVar1.GetSize(): 1000 n: 1
TestVar1.GetSize(): 1000 n: 7
TestVar1.GetSize(): 1000 n: 5
TestVar1.GetSize(): 1000 n: 0
TestVar1.GetSize(): 1000 n: 2
Closing the output file
Snapshotting df3 {TestVar3} as tree3 into the output file
TestVar1.GetSize(): 1000 n: 5
TestVar1.GetSize(): 1000 n: 5
TestVar1.GetSize(): 1000 n: 5
TestVar1.GetSize(): 1000 n: 3
TestVar1.GetSize(): 1000 n: 0
TestVar1.GetSize(): 1000 n: 1
TestVar1.GetSize(): 1000 n: 7
TestVar1.GetSize(): 1000 n: 5
TestVar1.GetSize(): 1000 n: 0
TestVar1.GetSize(): 1000 n: 2
Snapshotting df3 {TestVar2} as tree2 into the output file
TestVar1.GetSize(): 1000 n: 5
Error in <TTree::Branch>: The class requested (ROOT::VecOps::RVec<ROOT::Math::LorentzVector<ROOT::Math::PtEtaPhiM4D<double> > >) for the branch "TestVar2" is an instance of an stl collection and does not have a compiled CollectionProxy. Please generate the dictionary for this collection (ROOT::VecOps::RVec<ROOT::Math::LorentzVector<ROOT::Math::PtEtaPhiM4D<double> > >) to avoid to write corrupted data.
RDataFrame::Run: event loop was interrupted
terminate called after throwing an instance of 'std::logic_error'
what(): Trying to insert a null branch address.
Setup
- ROOT version: 6.25/01, built against commit 24ea49d on the master branch.
ROOT Version: 6.25/01 Built for linuxx8664gcc on Jan 01 1980, 00:00:00 From @ - Operating system: NixOS
$ nix-info -m - system: `"x86_64-linux"` - host os: `Linux 5.10.73, NixOS, 21.05.20211019.8fe3b97 (Okapi)` - multi-user?: `yes` - sandbox: `yes` - version: `nix-env (Nix) 2.4pre20211006_53e4794` - nixpkgs: `/nix/var/nix/profiles/per-user/root/channels/nixos`
- How you obtained ROOT:
Custom build with Nix package manager against the Nixpkgs revision 60e709069fa2fbcfca943d8f6df8dc6d33c67025, a custom fork against the master branch of NixOS/Nixpkgs, with the root source overrided. The gcc version is10.3.0.
Additional context
Nix Flake setup to build ROOT:
flake.nix
{
inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixos-21.05";
inputs.nixpkgs-root.url = "github:ShamrockLee/nixpkgs/root-6-25";
inputs.flake-utils.url = "github:numtide/flake-utils";
inputs.root-source.url = "github:root-project/root/master";
inputs.root-source.flake = false;
outputs = inputs@{self, nixpkgs, nixpkgs-root, flake-utils, root-source, ...}: flake-utils.lib.eachDefaultSystem (system: let
pkgs = nixpkgs.legacyPackages.${system};
pkgs-root = import nixpkgs-root {
inherit system;
overlays = [
(final: prev: {
root = prev.root.overrideAttrs (oldAttrs: {
version = "2021-09-01";
src = root-source;
cmakeFlags = (map (oldFlag:
if oldFlag == "-Dimt=OFF" then "-Dimt=ON"
else if oldFlag == "-Dssl=OFF" then "-Dssl=ON"
# else if oldFlag == "-Dgfal=OFF" then "-Dgfal=ON"
# else if oldFlag == "-Dxrootd=OFF" then "-Dxrootd=ON"
else oldFlag
) oldAttrs.cmakeFlags) ++ [
"-DCMAKE_BUILD_TYPE=RelWithDebInfo"
];
buildInputs = oldAttrs.buildInputs ++ (with pkgs-root; [
tbb # for implicit multithreading
openssl # for ssl support
]);
});
})
];
};
devShell = pkgs.mkShell {
buildInputs = (with pkgs-root; [
root
]);
nativeBuildInputs = (with pkgs-root; [
gcc
gnumake
cmake
]) ++ (with pkgs;[
gawk
gitAndTools.gitFull
]);
};
packagesSub = {
inherit (pkgs-root) root gcc gnumake cmake;
inherit (pkgs) gawk;
inherit (pkgs.gitAndTools) git gitFull;
};
run = pkgs.writeShellScriptBin "run" ''
export PATH="${ with packagesSub; pkgs.lib.makeBinPath [ root gcc gnumake cmake gawk gitFull ]}:$PATH"
if test -n "${devShell.shellHook}"; then
. "${devShell.shellHook}";
fi
exec "$@"
'';
ana = pkgs.callPackage ./ana.nix { inherit (packagesSub) root; };
in{
legacyPackages = pkgs;
legacyPackages-root = pkgs-root;
inherit devShell;
defaultPackage = run;
packages = packagesSub // {
srcRaw = self;
inherit run ana;
};
});
}flake.lock
{
"nodes": {
"flake-utils": {
"locked": {
"lastModified": 1623875721,
"narHash": "sha256-A8BU7bjS5GirpAUv4QA+QnJ4CceLHkcXdRp4xITDB0s=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "f7e004a55b120c02ecb6219596820fcd32ca8772",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1628696776,
"narHash": "sha256-K8+/IQNZDZltIu/KSf+Tz01OwNzTGgTch0ktMOWcxLg=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "927ce1afc1db40869a463a37ea2738c27d425f80",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-21.05",
"repo": "nixpkgs",
"type": "github"
}
},
"nixpkgs-root": {
"locked": {
"lastModified": 1628672385,
"narHash": "sha256-Xb/yIAo1OK8OEwrtd5SP3bjAD06PLXsrWG3WTLKqme0=",
"owner": "ShamrockLee",
"repo": "nixpkgs",
"rev": "60e709069fa2fbcfca943d8f6df8dc6d33c67025",
"type": "github"
},
"original": {
"owner": "ShamrockLee",
"ref": "root-6-25",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs",
"nixpkgs-root": "nixpkgs-root",
"root-source": "root-source"
}
},
"root-source": {
"flake": false,
"locked": {
"lastModified": 1630489941,
"narHash": "sha256-HCUYlIz479asnYAD9wwKtFXJILKn/PrbT4mro6DoRMo=",
"owner": "root-project",
"repo": "root",
"rev": "24ea49d4702ba0aa9b1ea5521950520154ee151c",
"type": "github"
},
"original": {
"owner": "root-project",
"ref": "master",
"repo": "root",
"type": "github"
}
}
},
"root": "root",
"version": 7
}Reactions are currently unavailable