Skip to content

[DF] Issues managing TClonesArray branches #9240

@ShamrockLee

Description

@ShamrockLee
  • Checked for duplicates

Describe the bug

  • The GetSize() result of the TClonesArray fed to the (re)defining function are always 1000, not reflecting the correct value.
  • Redefining a variable that was the type TClonesArray causes GetColumnNames() method to return a column name vector with repeated column names, and trying to snapshot against related names causes fatal errors.

Expected behavior

  • The GetSize() method of the input TClonesArray returns the correct size.
  • GetColumnNames() produces unique column names.
  • The redefined TClonesArray can be snapshot correctly.

To Reproduce

  1. Place two ROOT macros in the same directory:

gen_tree.C

#include <Rtypes.h>
#include <TFile.h>
#include <TTree.h>
#include <TBranch.h>
#include <TClonesArray.h>
#include <TLorentzVector.h>

#include <string>
#include <cstdio>
#include <cstdlib>
#include <iostream>

Float_t gen_rand() {
	return rand() / static_cast<Double_t>(RAND_MAX);
}

void gen_tree(const std::string pathFile = "file_tcatlv.root") {
	TFile *tfOut = TFile::Open(pathFile.c_str(), "RECREATE");
	tfOut->cd();
	TTree *ttOut = new TTree("Events", "Tree of TClonesArrays of TLorentzVectors");
	auto *tcaTest = new TClonesArray("TLorentzVector", 8);
	size_t nVar;
	ttOut->Branch("TestVar1", tcaTest);
	ttOut->Branch("nTestVar1", &nVar);
	constexpr size_t nEntries = 10;
	for (size_t jEntry = 0; jEntry < nEntries; ++jEntry) {
		tcaTest->Clear();
		nVar = rand() % 8;
		tcaTest->Expand(nVar);
		std::cerr << "jEntry: " << jEntry << "\tnVar: " << nVar << "\tsize: " << tcaTest->GetSize() << std::endl;
		for (size_t iVar = 0; iVar < nVar; ++iVar) {
			const auto &pVar = new((*tcaTest)[iVar]) TLorentzVector(gen_rand(), gen_rand(), gen_rand(), gen_rand());
			std::fprintf(stderr, "(%F, %F, %F, %F) ", pVar->Px(), pVar->Py(), pVar->Pz(), pVar->M());
		}
		std::cerr << std::endl;
		ttOut->Fill();
	}
	ttOut->Write();
	tfOut->Close();
}

int main(int argc, char* argv[]) {
	if (argc > 1) {
		gen_tree(argv[1]);
	} else {
		gen_tree();
	}
	return 0;
}

reproduce.C

#include <Rtypes.h>
#include <TFile.h>
#include <TTree.h>
#include <TBranch.h>
#include <TH1.h>
#include <TLorentzVector.h>
#include <ROOT/RDataFrame.hxx>
#include <Math/Vector4D.h>

#include <vector>
#include <string>

void reproduce(const std::string pathFileIn = "file_tcatlv.root", const std::string pathFileOut = "file_out.root") {
	TFile *tfIn = TFile::Open(pathFileIn.c_str());
	TTree *ttIn = tfIn->Get<TTree>("Events");
	ROOT::RDataFrame dfIn(*ttIn);
	std::cerr << "Defining TestVar2 from TestVar1: TLorentzVector -> LorentzVector";
	auto df1 = dfIn.Define("TestVar2", [](const TClonesArray &tcaTest, const size_t n) {
		const size_t nGot = tcaTest.GetSize();
		std::cerr << "TestVar1.GetSize(): " << nGot << "\tn: " << n <<  std::endl;
		ROOT::RVec<ROOT::Math::PtEtaPhiMVector> vTest(n, ROOT::Math::PtEtaPhiMVector());
		for (size_t i = 0; i < n; ++i) {
			const auto &tlv = *static_cast<TLorentzVector *>(tcaTest[i]);
			vTest[i].SetPxPyPzE(tlv.Px(), tlv.Py(), tlv.Pz(), tlv.E());
		}
		return vTest;
	}, { "TestVar1", "nTestVar1" });
	std::cerr << "Redefining TestVar1: TLorentzVector -> LorentzVector" << std::endl;
	auto df2 = df1.Redefine("TestVar1", [](const TClonesArray &tcaTest, const size_t n) {
		const size_t nGot = tcaTest.GetSize();
		std::cerr << "TestVar1.GetSize(): " << nGot << "\tn: " << n <<  std::endl;
		ROOT::RVec<ROOT::Math::PtEtaPhiMVector> vTest(n, ROOT::Math::PtEtaPhiMVector());
		for (size_t i = 0; i < n; ++i) {
			const auto &tlv = *static_cast<TLorentzVector *>(tcaTest[i]);
			vTest[i].SetPxPyPzE(tlv.Px(), tlv.Py(), tlv.Pz(), tlv.E());
		}
		return vTest;
	}, { "TestVar1", "nTestVar1" });
	std::cerr << "Creating a custom column TestVar2" << std::endl;
	auto df3 = df2.Define("TestVar3", [](ROOT::RVec<ROOT::Math::PtEtaPhiMVector> TestVar2) {
		return ROOT::VecOps::Map(TestVar2, [](ROOT::Math::PtEtaPhiMVector lv){return lv.Pt();});
	}, {"TestVar2"});
	std::cerr << "Columns: {" << std::flush;
	for (const auto nameCol: df3.GetColumnNames()) {
		std::cerr << nameCol << ", " << std::flush;
	}
	std::cerr << "}" << std::endl;
	std::cerr << "Generating histogram hTest3 with TestVar3 in df3" << std::endl;
	TFile *tfOut = TFile::Open(pathFileOut.c_str(), "RECREATE");
	tfOut->cd();
	df3.Histo1D({ "hTest3", "", 30, 0., 3. }, "TestVar3")->Write();
	std::cerr << "Closing the output file" << std::endl;
	tfOut->Close();
	std::cerr << "Snapshotting df3 {TestVar3} as tree3 into the output file" << std::endl;
	df3.Snapshot("tree3", pathFileOut, {"TestVar3"});
	std::cerr << "Snapshotting df3 {TestVar2} as tree2 into the output file" << std::endl;
	df3.Snapshot("tree2", pathFileOut, {"TestVar2"});
	std::cerr << "Snapshotting df3 {TestVar1} as tree1 into the output file" << std::endl;
	df3.Snapshot("tree1", pathFileOut, {"TestVar1"});
	std::cerr << "Closing the input file" << std::endl;
	tfIn->Close();
	std::cerr << "Complete!" << std::endl;
}

int main(int argc, char* argv[]) {
	if (argc > 1) {
		reproduce(argv[1], argv[2]);
	} else {
		reproduce();
	}
}
  1. Run:

    $ root -l -q 'gen_tree.C++()'
    $ root -l -q 'reproduce.C++()'
  2. See outputs and errors.

    • Duplicated column names
    Columns: {TestVar2, TestVar1, TestVar3, TestVar1.fUniqueID, TestVar1.fBits, TestVar1.fP, TestVar1.fE, TestVar1, nTestVar1, }
    
    • Incorrect GetSize()
    Generating histogram hTest3 with TestVar3 in df3
    TestVar1.GetSize(): 1000        n: 5
    TestVar1.GetSize(): 1000        n: 1
    TestVar1.GetSize(): 1000        n: 7
    TestVar1.GetSize(): 1000        n: 0
    
    • Snapsht error happens to TestVar1, but not TestVar2 or TestVar3
      Snapshot error happens to both TestVar1 and TestVar2, but not TestVar3
    • Full outputs:
$ root -l -q 'gen_tree.C++()'

Processing gen_tree.C++()...
Info in <TUnixSystem::ACLiC>: creating shared library /run/media/root/data-btrfs/shamrock-shared/targets/Research_LL/ctau-proper/lxplus_HTcondor/preselect/redefine_custom_vector.root/./gen_tree_C.so
Warning in <TTree::Bronch>: Using split mode on a class: TLorentzVector with a custom Streamer
jEntry: 0	nVar: 5	size: 5
(0.780836, 0.994304, 0.974700, -1.584510) (0.888099, 0.785263, 0.905259, -1.305239) (0.862221, 0.008688, 0.320079, -0.430004) (0.508159, 0.227104, 0.549878, -0.729431) (0.994887, 0.949317, 0.458636, -1.254915) 
jEntry: 1	nVar: 5	size: 5
(0.651535, 0.070535, 0.859432, -0.669713) (0.848072, 0.268093, 0.718520, -1.046939) (0.307848, 0.268393, 0.240499, -0.365170) (0.243093, 0.434697, 0.190251, -0.532397) (0.339955, 0.912194, 0.890578, -1.272495) 
jEntry: 2	nVar: 5	size: 5
(0.703626, 0.856803, 0.459377, -1.173015) (0.139822, 0.454264, 0.202821, 0.301843) (0.179893, 0.588359, 0.249993, -0.615069) (0.447791, 0.098274, 0.098500, 0.540937) (0.816794, 0.558164, 0.023732, -0.985904) 
jEntry: 3	nVar: 3	size: 3
(0.325884, 0.328280, 0.233360, -0.045052) (0.356636, 0.573315, 0.428329, -0.760415) (0.921959, 0.660601, 0.950840, -1.258582) 
jEntry: 4	nVar: 0	size: 0

jEntry: 5	nVar: 1	size: 1
(0.918584, 0.574242, 0.407165, -1.035375) 
jEntry: 6	nVar: 7	size: 7
(0.995525, 0.767034, 0.004563, -1.175969) (0.865308, 0.103064, 0.159200, -0.565209) (0.661228, 0.182932, 0.767290, -0.920826) (0.508816, 0.095570, 0.693251, -0.864872) (0.452207, 0.266567, 0.453281, 0.090425) (0.188525, 0.113883, 0.650261, 0.681392) (0.233861, 0.854605, 0.885871, -0.957485) 
jEntry: 7	nVar: 5	size: 5
(0.402912, 0.562238, 0.257295, -0.718170) (0.566801, 0.700610, 0.035254, -0.603856) (0.859810, 0.717357, 0.331093, -1.166906) (0.484647, 0.790985, 0.551558, -0.910002) (0.484236, 0.576510, 0.032424, -0.064769) 
jEntry: 8	nVar: 0	size: 0

jEntry: 9	nVar: 2	size: 2
(0.939328, 0.143674, 0.382106, -0.476480) (0.377535, 0.236711, 0.792486, -0.890021) 

$ root -l -q 'reproduce.C++()'

Processing reproduce.C++()...
Info in <TUnixSystem::ACLiC>: creating shared library /run/media/root/data-btrfs/shamrock-shared/targets/Research_LL/ctau-proper/lxplus_HTcondor/preselect/redefine_custom_vector.root/./reproduce_C.so
Defining TestVar2 from TestVar1: TLorentzVector -> LorentzVectorRedefining TestVar1: TLorentzVector -> LorentzVector
Creating a custom column TestVar2
Columns: {TestVar2, TestVar1, TestVar3, TestVar1.fUniqueID, TestVar1.fBits, TestVar1.fP, TestVar1.fE, TestVar1, nTestVar1, }
Generating histogram hTest3 with TestVar3 in df3
TestVar1.GetSize(): 1000	n: 5
TestVar1.GetSize(): 1000	n: 5
TestVar1.GetSize(): 1000	n: 5
TestVar1.GetSize(): 1000	n: 3
TestVar1.GetSize(): 1000	n: 0
TestVar1.GetSize(): 1000	n: 1
TestVar1.GetSize(): 1000	n: 7
TestVar1.GetSize(): 1000	n: 5
TestVar1.GetSize(): 1000	n: 0
TestVar1.GetSize(): 1000	n: 2
Closing the output file
Snapshotting df3 {TestVar3} as tree3 into the output file
TestVar1.GetSize(): 1000	n: 5
TestVar1.GetSize(): 1000	n: 5
TestVar1.GetSize(): 1000	n: 5
TestVar1.GetSize(): 1000	n: 3
TestVar1.GetSize(): 1000	n: 0
TestVar1.GetSize(): 1000	n: 1
TestVar1.GetSize(): 1000	n: 7
TestVar1.GetSize(): 1000	n: 5
TestVar1.GetSize(): 1000	n: 0
TestVar1.GetSize(): 1000	n: 2
Snapshotting df3 {TestVar2} as tree2 into the output file
TestVar1.GetSize(): 1000	n: 5
Error in <TTree::Branch>: The class requested (ROOT::VecOps::RVec<ROOT::Math::LorentzVector<ROOT::Math::PtEtaPhiM4D<double> > >) for the branch "TestVar2" is an instance of an stl collection and does not have a compiled CollectionProxy. Please generate the dictionary for this collection (ROOT::VecOps::RVec<ROOT::Math::LorentzVector<ROOT::Math::PtEtaPhiM4D<double> > >) to avoid to write corrupted data.
RDataFrame::Run: event loop was interrupted
terminate called after throwing an instance of 'std::logic_error'
  what():  Trying to insert a null branch address.

Setup

  1. ROOT version: 6.25/01, built against commit 24ea49d on the master branch.
    ROOT Version: 6.25/01
    Built for linuxx8664gcc on Jan 01 1980, 00:00:00
    From @
    
  2. Operating system: NixOS
    $ nix-info -m
     - system: `"x86_64-linux"`
     - host os: `Linux 5.10.73, NixOS, 21.05.20211019.8fe3b97 (Okapi)`
     - multi-user?: `yes`
     - sandbox: `yes`
     - version: `nix-env (Nix) 2.4pre20211006_53e4794`
     - nixpkgs: `/nix/var/nix/profiles/per-user/root/channels/nixos`
  3. How you obtained ROOT:
    Custom build with Nix package manager against the Nixpkgs revision 60e709069fa2fbcfca943d8f6df8dc6d33c67025, a custom fork against the master branch of NixOS/Nixpkgs, with the root source overrided. The gcc version is 10.3.0.

Additional context

Nix Flake setup to build ROOT:

flake.nix

{
  inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixos-21.05";
  inputs.nixpkgs-root.url = "github:ShamrockLee/nixpkgs/root-6-25";
  inputs.flake-utils.url = "github:numtide/flake-utils";
  inputs.root-source.url = "github:root-project/root/master";
  inputs.root-source.flake = false;
  outputs = inputs@{self, nixpkgs, nixpkgs-root, flake-utils, root-source, ...}: flake-utils.lib.eachDefaultSystem (system: let
    pkgs = nixpkgs.legacyPackages.${system};
    pkgs-root = import nixpkgs-root {
      inherit system;
      overlays = [
        (final: prev: {
          root = prev.root.overrideAttrs (oldAttrs: {
            version = "2021-09-01";
            src = root-source;
            cmakeFlags = (map (oldFlag:
              if oldFlag == "-Dimt=OFF" then "-Dimt=ON"
              else if oldFlag == "-Dssl=OFF" then "-Dssl=ON"
              # else if oldFlag == "-Dgfal=OFF" then "-Dgfal=ON"
              # else if oldFlag == "-Dxrootd=OFF" then "-Dxrootd=ON"
              else oldFlag
            ) oldAttrs.cmakeFlags) ++ [
              "-DCMAKE_BUILD_TYPE=RelWithDebInfo"
            ];
            buildInputs = oldAttrs.buildInputs ++ (with pkgs-root; [
              tbb # for implicit multithreading
              openssl # for ssl support
            ]);
          });
        })
      ];
    };
    devShell = pkgs.mkShell {
      buildInputs = (with pkgs-root; [
        root
      ]);
      nativeBuildInputs = (with pkgs-root; [
        gcc
        gnumake
        cmake
      ]) ++ (with pkgs;[
        gawk
        gitAndTools.gitFull
      ]);
    };
    packagesSub = {
      inherit (pkgs-root) root gcc gnumake cmake;
      inherit (pkgs) gawk;
      inherit (pkgs.gitAndTools) git gitFull;
    };
    run = pkgs.writeShellScriptBin "run" ''
      export PATH="${ with packagesSub; pkgs.lib.makeBinPath [ root gcc gnumake cmake gawk gitFull ]}:$PATH"
      if test -n "${devShell.shellHook}"; then
        . "${devShell.shellHook}";
      fi
      exec "$@"
    '';
    ana = pkgs.callPackage ./ana.nix { inherit (packagesSub) root; };
  in{
    legacyPackages = pkgs;
    legacyPackages-root = pkgs-root;
    inherit devShell;
    defaultPackage = run;
    packages = packagesSub // {
      srcRaw = self;
      inherit run ana;
    };
  });
}

flake.lock

{
  "nodes": {
    "flake-utils": {
      "locked": {
        "lastModified": 1623875721,
        "narHash": "sha256-A8BU7bjS5GirpAUv4QA+QnJ4CceLHkcXdRp4xITDB0s=",
        "owner": "numtide",
        "repo": "flake-utils",
        "rev": "f7e004a55b120c02ecb6219596820fcd32ca8772",
        "type": "github"
      },
      "original": {
        "owner": "numtide",
        "repo": "flake-utils",
        "type": "github"
      }
    },
    "nixpkgs": {
      "locked": {
        "lastModified": 1628696776,
        "narHash": "sha256-K8+/IQNZDZltIu/KSf+Tz01OwNzTGgTch0ktMOWcxLg=",
        "owner": "NixOS",
        "repo": "nixpkgs",
        "rev": "927ce1afc1db40869a463a37ea2738c27d425f80",
        "type": "github"
      },
      "original": {
        "owner": "NixOS",
        "ref": "nixos-21.05",
        "repo": "nixpkgs",
        "type": "github"
      }
    },
    "nixpkgs-root": {
      "locked": {
        "lastModified": 1628672385,
        "narHash": "sha256-Xb/yIAo1OK8OEwrtd5SP3bjAD06PLXsrWG3WTLKqme0=",
        "owner": "ShamrockLee",
        "repo": "nixpkgs",
        "rev": "60e709069fa2fbcfca943d8f6df8dc6d33c67025",
        "type": "github"
      },
      "original": {
        "owner": "ShamrockLee",
        "ref": "root-6-25",
        "repo": "nixpkgs",
        "type": "github"
      }
    },
    "root": {
      "inputs": {
        "flake-utils": "flake-utils",
        "nixpkgs": "nixpkgs",
        "nixpkgs-root": "nixpkgs-root",
        "root-source": "root-source"
      }
    },
    "root-source": {
      "flake": false,
      "locked": {
        "lastModified": 1630489941,
        "narHash": "sha256-HCUYlIz479asnYAD9wwKtFXJILKn/PrbT4mro6DoRMo=",
        "owner": "root-project",
        "repo": "root",
        "rev": "24ea49d4702ba0aa9b1ea5521950520154ee151c",
        "type": "github"
      },
      "original": {
        "owner": "root-project",
        "ref": "master",
        "repo": "root",
        "type": "github"
      }
    }
  },
  "root": "root",
  "version": 7
}

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions