@@ -71,80 +71,78 @@ bool PerfCounters::IsCounterSupported(const std::string& name) {
7171 return (ret == PFM_SUCCESS);
7272}
7373
74- // Validates all counter names passed, returning only the valid ones
75- static std::vector<std::string> validateCounters (
74+ PerfCounters PerfCounters::Create (
7675 const std::vector<std::string>& counter_names) {
77- // All valid names to be returned
76+ // Valid counters will populate these arrays but we start empty
7877 std::vector<std::string> valid_names;
78+ std::vector<int > counter_ids;
79+ std::vector<int > leader_ids;
80+
81+ // Resize to the maximum possible
82+ valid_names.reserve (counter_names.size ());
83+ counter_ids.reserve (counter_names.size ());
84+
85+ const int kCounterMode = PFM_PLM3; // user mode only
86+
87+ // Group leads will be assigned on demand. The idea is that once we cannot
88+ // create a counter descriptor, the reason is that this group has maxed out
89+ // so we set the group_id again to -1 and retry - giving the algorithm a
90+ // chance to create a new group leader to hold the next set of counters.
91+ int group_id = -1 ;
7992
80- // Loop through all the given names
81- int invalid_counter = 0 ;
82- for (const std::string& name : counter_names) {
83- // Check trivial empty
93+ // Loop through all performance counters
94+ for (size_t i = 0 ; i < counter_names.size (); ++i) {
95+ // we are about to push into the valid names vector
96+ // check if we did not reach the maximum
97+ if (valid_names.size () == PerfCounterValues::kMaxCounters ) {
98+ // Log a message if we maxed out and stop adding
99+ GetErrorLogInstance ()
100+ << counter_names.size () << " counters were requested. The maximum is "
101+ << PerfCounterValues::kMaxCounters << " and " << valid_names.size ()
102+ << " were already added. All remaining counters will be ignored\n " ;
103+ // stop the loop and return what we have already
104+ break ;
105+ }
106+
107+ // Check if this name is empty
108+ const auto & name = counter_names[i];
84109 if (name.empty ()) {
85- GetErrorLogInstance () << " A counter name was the empty string \n " ;
86- invalid_counter++ ;
110+ GetErrorLogInstance ()
111+ << " A performance counter name was the empty string \n " ;
87112 continue ;
88113 }
89- if (PerfCounters::IsCounterSupported (name)) {
90- // we are about to push into the valid names vector
91- // check if we did not reach the maximum
92- if (valid_names.size () == PerfCounterValues::kMaxCounters ) {
93- GetErrorLogInstance ()
94- << counter_names.size ()
95- << " counters were requested. The maximum is "
96- << PerfCounterValues::kMaxCounters << " and "
97- << counter_names.size () - invalid_counter - valid_names.size ()
98- << " will be ignored\n " ;
99- // stop the loop and return what we have already
100- break ;
101- }
102- valid_names.push_back (name);
103- } else {
104- GetErrorLogInstance () << " Performance counter " << name
105- << " incorrect or not supported on this platform\n " ;
106- invalid_counter++;
107- }
108- }
109- // RVO should take care of this
110- return valid_names;
111- }
112-
113- PerfCounters PerfCounters::Create (
114- const std::vector<std::string>& counter_names) {
115- std::vector<std::string> valid_names = validateCounters (counter_names);
116- if (valid_names.empty ()) {
117- return NoCounters ();
118- }
119- std::vector<int > counter_ids (valid_names.size ());
120- std::vector<int > leader_ids;
121114
122- const int mode = PFM_PLM3; // user mode only
123- int group_id = -1 ;
124- for (size_t i = 0 ; i < valid_names.size (); ++i) {
115+ // Here first means first in group, ie the group leader
125116 const bool is_first = (group_id < 0 );
117+
118+ // This struct will be populated by libpfm from the counter string
119+ // and then fed into the syscall perf_event_open
126120 struct perf_event_attr attr {};
127121 attr.size = sizeof (attr);
128- const auto & name = valid_names[i];
122+
123+ // This is the input struct to libpfm.
129124 pfm_perf_encode_arg_t arg{};
130125 arg.attr = &attr;
131-
132- const int pfm_get =
133- pfm_get_os_event_encoding (name.c_str (), mode, PFM_OS_PERF_EVENT, &arg);
126+ const int pfm_get = pfm_get_os_event_encoding (name.c_str (), kCounterMode ,
127+ PFM_OS_PERF_EVENT, &arg);
134128 if (pfm_get != PFM_SUCCESS) {
135- GetErrorLogInstance () << " Unknown counter name: " << name << " \n " ;
136- return NoCounters ();
129+ GetErrorLogInstance ()
130+ << " Unknown performance counter name: " << name << " \n " ;
131+ continue ;
137132 }
138- attr.disabled = is_first;
133+
134+ // We then proceed to populate the remaining fields in our attribute struct
139135 // Note: the man page for perf_event_create suggests inherit = true and
140136 // read_format = PERF_FORMAT_GROUP don't work together, but that's not the
141137 // case.
138+ attr.disabled = is_first;
142139 attr.inherit = true ;
143140 attr.pinned = is_first;
144141 attr.exclude_kernel = true ;
145142 attr.exclude_user = false ;
146143 attr.exclude_hv = true ;
147- // Read all counters in one read.
144+
145+ // Read all counters in a group in one read.
148146 attr.read_format = PERF_FORMAT_GROUP;
149147
150148 int id = -1 ;
@@ -159,36 +157,64 @@ PerfCounters PerfCounters::Create(
159157 }
160158 }
161159 if (id < 0 ) {
162- // We reached a limit perhaps?
160+ // If the file descriptor is negative we might have reached a limit
161+ // in the current group. Set the group_id to -1 and retry
163162 if (group_id >= 0 ) {
164163 // Create a new group
165164 group_id = -1 ;
166165 } else {
167- // Give up, there is nothing else to try
166+ // At this point we have already retried to set a new group id and
167+ // failed. We then give up.
168168 break ;
169169 }
170170 }
171171 }
172+
173+ // We failed to get a new file descriptor. We might have reached a hard
174+ // hardware limit that cannot be resolved even with group multiplexing
172175 if (id < 0 ) {
173- GetErrorLogInstance ()
174- << " Failed to get a file descriptor for " << name << " \n " ;
175- return NoCounters ();
176+ GetErrorLogInstance () << " ***WARNING** Failed to get a file descriptor "
177+ " for performance counter "
178+ << name << " . Ignoring\n " ;
179+
180+ // We give up on this counter but try to keep going
181+ // as the others would be fine
182+ continue ;
176183 }
177184 if (group_id < 0 ) {
178- // This is a leader, store and assign it
185+ // This is a leader, store and assign it to the current file descriptor
179186 leader_ids.push_back (id);
180187 group_id = id;
181188 }
182- counter_ids[i] = id;
189+ // This is a valid counter, add it to our descriptor's list
190+ counter_ids.push_back (id);
191+ valid_names.push_back (name);
183192 }
193+
194+ // Loop through all group leaders activating them
195+ // There is another option of starting ALL counters in a process but
196+ // that would be far reaching an intrusion. If the user is using PMCs
197+ // by themselves then this would have a side effect on them. It is
198+ // friendlier to loop through all groups individually.
184199 for (int lead : leader_ids) {
185200 if (ioctl (lead, PERF_EVENT_IOC_ENABLE) != 0 ) {
186- GetErrorLogInstance () << " Failed to start counters\n " ;
201+ // This should never happen but if it does, we give up on the
202+ // entire batch as recovery would be a mess.
203+ GetErrorLogInstance () << " ***WARNING*** Failed to start counters. "
204+ " Claring out all counters.\n " ;
205+
206+ // Close all peformance counters
207+ for (int id : counter_ids) {
208+ ::close (id);
209+ }
210+
211+ // Return an empty object so our internal state is still good and
212+ // the process can continue normally without impact
187213 return NoCounters ();
188214 }
189215 }
190216
191- return PerfCounters (valid_names, std::move (counter_ids),
217+ return PerfCounters (std::move ( valid_names) , std::move (counter_ids),
192218 std::move (leader_ids));
193219}
194220
@@ -223,34 +249,10 @@ PerfCounters PerfCounters::Create(
223249void PerfCounters::CloseCounters () const {}
224250#endif // defined HAVE_LIBPFM
225251
226- Mutex PerfCountersMeasurement::mutex_;
227- int PerfCountersMeasurement::ref_count_ = 0 ;
228- PerfCounters PerfCountersMeasurement::counters_ = PerfCounters::NoCounters();
229-
230- // The validation in PerfCounter::Create will create less counters than passed
231- // so it should be okay to initialize start_values_ and end_values_ with the
232- // upper bound as passed
233252PerfCountersMeasurement::PerfCountersMeasurement (
234253 const std::vector<std::string>& counter_names)
235254 : start_values_(counter_names.size()), end_values_(counter_names.size()) {
236- MutexLock l (mutex_);
237- if (ref_count_ == 0 ) {
238- counters_ = PerfCounters::Create (counter_names);
239- }
240- // We chose to increment it even if `counters_` ends up invalid,
241- // so that we don't keep trying to create, and also since the dtor
242- // will decrement regardless of `counters_`'s validity
243- ++ref_count_;
244-
245- BM_CHECK (!counters_.IsValid () || counters_.names () == counter_names);
246- }
247-
248- PerfCountersMeasurement::~PerfCountersMeasurement () {
249- MutexLock l (mutex_);
250- --ref_count_;
251- if (ref_count_ == 0 ) {
252- counters_ = PerfCounters::NoCounters ();
253- }
255+ counters_ = PerfCounters::Create (counter_names);
254256}
255257
256258PerfCounters& PerfCounters::operator =(PerfCounters&& other) noexcept {
0 commit comments