Skip to content

Commit 3b47cba

Browse files
trabettihtuch
authored andcommitted
added histogram latency information to Hystrix dashboard stream (#3986)
Adding the latency information from existing histogram to Hystrix event stream that was added in #3425 Risk Level: Low Testing: Docs Changes: admin.rst Release Notes: No addition to release note of original Hystrix stream PR, which is still pending for 1.8.0 Fixes #3753 Signed-off-by: trabetti <[email protected]>
1 parent cf87d50 commit 3b47cba

File tree

8 files changed

+221
-53
lines changed

8 files changed

+221
-53
lines changed

docs/root/operations/admin.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ The fields are:
352352
In Envoy, service unavailable response will cause **outlier detection** - removing a node off the
353353
load balancer pool, but requests are not rejected as a result. Therefore, this counter is always
354354
set to '0'.
355-
* Latency information is currently unavailable.
355+
* Latency information represents data since last flush.
356+
Mean latency is currently not available.
356357

357358

source/common/stats/histogram_impl.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ HistogramStatisticsImpl::HistogramStatisticsImpl(const histogram_t* histogram_pt
1717
}
1818

1919
const std::vector<double>& HistogramStatisticsImpl::supportedQuantiles() const {
20-
static const std::vector<double> supported_quantiles = {0, 0.25, 0.5, 0.75, 0.90,
21-
0.95, 0.99, 0.999, 1};
20+
static const std::vector<double> supported_quantiles = {0, 0.25, 0.5, 0.75, 0.90,
21+
0.95, 0.99, 0.995, 0.999, 1};
2222
return supported_quantiles;
2323
}
2424

source/extensions/stat_sinks/hystrix/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ envoy_cc_library(
3535
"//include/envoy/stats:stats_interface",
3636
"//source/common/buffer:buffer_lib",
3737
"//source/common/common:logger_lib",
38+
"//source/common/config:well_known_names",
3839
"//source/common/http:headers_lib",
3940
],
4041
)

source/extensions/stat_sinks/hystrix/hystrix.cc

Lines changed: 62 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,19 @@
99

1010
#include "common/buffer/buffer_impl.h"
1111
#include "common/common/logger.h"
12+
#include "common/config/well_known_names.h"
1213
#include "common/http/headers.h"
1314

1415
#include "absl/strings/str_cat.h"
16+
#include "absl/strings/str_split.h"
17+
#include "fmt/printf.h"
1518

1619
namespace Envoy {
1720
namespace Extensions {
1821
namespace StatSinks {
1922
namespace Hystrix {
2023

2124
const uint64_t HystrixSink::DEFAULT_NUM_BUCKETS;
22-
2325
ClusterStatsCache::ClusterStatsCache(const std::string& cluster_name)
2426
: cluster_name_(cluster_name) {}
2527

@@ -43,6 +45,19 @@ void ClusterStatsCache::printRollingWindow(absl::string_view name, RollingWindow
4345
out_str << std::endl;
4446
}
4547

48+
void HystrixSink::addHistogramToStream(const QuantileLatencyMap& latency_map, absl::string_view key,
49+
std::stringstream& ss) {
50+
// TODO: Consider if we better use join here
51+
ss << ", \"" << key << "\": {";
52+
bool is_first = true;
53+
for (const std::pair<double, double>& element : latency_map) {
54+
const std::string quantile = fmt::sprintf("%g", element.first * 100);
55+
HystrixSink::addDoubleToStream(quantile, element.second, ss, is_first);
56+
is_first = false;
57+
}
58+
ss << "}";
59+
}
60+
4661
// Add new value to rolling window, in place of oldest one.
4762
void HystrixSink::pushNewValue(RollingWindow& rolling_window, uint64_t value) {
4863
if (rolling_window.empty()) {
@@ -118,6 +133,11 @@ void HystrixSink::addIntToStream(absl::string_view key, uint64_t value, std::str
118133
addInfoToStream(key, std::to_string(value), info, is_first);
119134
}
120135

136+
void HystrixSink::addDoubleToStream(absl::string_view key, double value, std::stringstream& info,
137+
bool is_first) {
138+
addInfoToStream(key, std::to_string(value), info, is_first);
139+
}
140+
121141
void HystrixSink::addInfoToStream(absl::string_view key, absl::string_view value,
122142
std::stringstream& info, bool is_first) {
123143
if (!is_first) {
@@ -131,7 +151,7 @@ void HystrixSink::addHystrixCommand(ClusterStatsCache& cluster_stats_cache,
131151
absl::string_view cluster_name,
132152
uint64_t max_concurrent_requests, uint64_t reporting_hosts,
133153
std::chrono::milliseconds rolling_window_ms,
134-
std::stringstream& ss) {
154+
const QuantileLatencyMap& histogram, std::stringstream& ss) {
135155

136156
std::time_t currentTime = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
137157

@@ -161,7 +181,7 @@ void HystrixSink::addHystrixCommand(ClusterStatsCache& cluster_stats_cache,
161181
addIntToStream("rollingCountResponsesFromCache", 0, ss);
162182

163183
// Envoy's "circuit breaker" has similar meaning to hystrix's isolation
164-
// so we count upstream_rq_pending_overflow and present it as ss
184+
// so we count upstream_rq_pending_overflow and present it as rollingCountSemaphoreRejected
165185
addIntToStream("rollingCountSemaphoreRejected", rejected, ss);
166186

167187
// Hystrix's short circuit is not similar to Envoy's since it is triggered by 503 responses
@@ -173,12 +193,8 @@ void HystrixSink::addHystrixCommand(ClusterStatsCache& cluster_stats_cache,
173193
addIntToStream("rollingCountTimeout", timeouts, ss);
174194
addIntToStream("rollingCountBadRequests", 0, ss);
175195
addIntToStream("currentConcurrentExecutionCount", 0, ss);
176-
addIntToStream("latencyExecute_mean", 0, ss);
177-
178-
// TODO trabetti : add histogram information once available by PR #2932
179-
addInfoToStream(
180-
"latencyExecute",
181-
"{\"0\":0,\"25\":0,\"50\":0,\"75\":0,\"90\":0,\"95\":0,\"99\":0,\"99.5\":0,\"100\":0}", ss);
196+
addStringToStream("latencyExecute_mean", "null", ss);
197+
addHistogramToStream(histogram, "latencyExecute", ss);
182198
addIntToStream("propertyValue_circuitBreakerRequestVolumeThreshold", 0, ss);
183199
addIntToStream("propertyValue_circuitBreakerSleepWindowInMilliseconds", 0, ss);
184200
addIntToStream("propertyValue_circuitBreakerErrorThresholdPercentage", 0, ss);
@@ -230,10 +246,11 @@ void HystrixSink::addClusterStatsToStream(ClusterStatsCache& cluster_stats_cache
230246
uint64_t max_concurrent_requests,
231247
uint64_t reporting_hosts,
232248
std::chrono::milliseconds rolling_window_ms,
249+
const QuantileLatencyMap& histogram,
233250
std::stringstream& ss) {
234251

235252
addHystrixCommand(cluster_stats_cache, cluster_name, max_concurrent_requests, reporting_hosts,
236-
rolling_window_ms, ss);
253+
rolling_window_ms, histogram, ss);
237254
addHystrixThreadPool(cluster_name, max_concurrent_requests, reporting_hosts, rolling_window_ms,
238255
ss);
239256
}
@@ -299,13 +316,46 @@ Http::Code HystrixSink::handlerHystrixEventStream(absl::string_view,
299316
return Http::Code::OK;
300317
}
301318

302-
void HystrixSink::flush(Stats::Source&) {
319+
void HystrixSink::flush(Stats::Source& source) {
303320
if (callbacks_list_.empty()) {
304321
return;
305322
}
306323
incCounter();
307324
std::stringstream ss;
308325
Upstream::ClusterManager::ClusterInfoMap clusters = server_.clusterManager().clusters();
326+
327+
// Save a map of the relevant histograms per cluster in a convenient format.
328+
std::unordered_map<std::string, QuantileLatencyMap> time_histograms;
329+
for (const Stats::ParentHistogramSharedPtr& histogram : source.cachedHistograms()) {
330+
if (histogram->tagExtractedName() == "cluster.upstream_rq_time") {
331+
// TODO(mrice32): add an Envoy utility function to look up and return a tag for a metric.
332+
auto it = std::find_if(histogram->tags().begin(), histogram->tags().end(),
333+
[](const Stats::Tag& tag) {
334+
return (tag.name_ == Config::TagNames::get().CLUSTER_NAME);
335+
});
336+
337+
// Make sure we found the cluster name tag
338+
ASSERT(it != histogram->tags().end());
339+
auto it_bool_pair = time_histograms.emplace(std::make_pair(it->value_, QuantileLatencyMap()));
340+
// Make sure histogram with this name was not already added
341+
ASSERT(it_bool_pair.second);
342+
QuantileLatencyMap& hist_map = it_bool_pair.first->second;
343+
344+
const std::vector<double>& supported_quantiles =
345+
histogram->intervalStatistics().supportedQuantiles();
346+
for (size_t i = 0; i < supported_quantiles.size(); ++i) {
347+
// binary-search here is likely not worth it, as hystrix_quantiles has <10 elements.
348+
if (std::find(hystrix_quantiles.begin(), hystrix_quantiles.end(), supported_quantiles[i]) !=
349+
hystrix_quantiles.end()) {
350+
const double value = histogram->intervalStatistics().computedQuantiles()[i];
351+
if (!std::isnan(value)) {
352+
hist_map[supported_quantiles[i]] = value;
353+
}
354+
}
355+
}
356+
}
357+
}
358+
309359
for (auto& cluster : clusters) {
310360
Upstream::ClusterInfoConstSharedPtr cluster_info = cluster.second.get().info();
311361

@@ -323,7 +373,7 @@ void HystrixSink::flush(Stats::Source&) {
323373
*cluster_stats_cache_ptr, cluster_info->name(),
324374
cluster_info->resourceManager(Upstream::ResourcePriority::Default).pendingRequests().max(),
325375
cluster_info->statsScope().gauge("membership_total").value(), server_.statsFlushInterval(),
326-
ss);
376+
time_histograms[cluster_info->name()], ss);
327377
}
328378

329379
Buffer::OwnedImpl data;

source/extensions/stat_sinks/hystrix/hystrix.h

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ namespace Hystrix {
1818
typedef std::vector<uint64_t> RollingWindow;
1919
typedef std::map<const std::string, RollingWindow> RollingStatsMap;
2020

21+
using QuantileLatencyMap = std::unordered_map<double, double>;
22+
static const std::vector<double> hystrix_quantiles = {0, 0.25, 0.5, 0.75, 0.90,
23+
0.95, 0.99, 0.995, 1};
24+
2125
struct {
2226
const std::string AllowHeadersHystrix{"Accept, Cache-Control, X-Requested-With, Last-Event-ID"};
2327
} AccessControlAllowHeadersValue;
@@ -74,7 +78,8 @@ class HystrixSink : public Stats::Sink, public Logger::Loggable<Logger::Id::hyst
7478
void addClusterStatsToStream(ClusterStatsCache& cluster_stats_cache,
7579
absl::string_view cluster_name, uint64_t max_concurrent_requests,
7680
uint64_t reporting_hosts,
77-
std::chrono::milliseconds rolling_window_ms, std::stringstream& ss);
81+
std::chrono::milliseconds rolling_window_ms,
82+
const QuantileLatencyMap& histogram, std::stringstream& ss);
7883

7984
/**
8085
* Calculate values needed to create the stream and write into the map.
@@ -96,33 +101,44 @@ class HystrixSink : public Stats::Sink, public Logger::Loggable<Logger::Id::hyst
96101
*/
97102
uint64_t getRollingValue(RollingWindow rolling_window);
98103

99-
private:
100104
/**
101-
* Format the given key and absl::string_view value to "key"="value", and adding to the
105+
* Format the given key and value to "key"=value, and adding to the stringstream.
106+
*/
107+
static void addInfoToStream(absl::string_view key, absl::string_view value,
108+
std::stringstream& info, bool is_first = false);
109+
110+
/**
111+
* Format the given key and double value to "key"=<string of uint64_t>, and adding to the
102112
* stringstream.
103113
*/
104-
void addStringToStream(absl::string_view key, absl::string_view value, std::stringstream& info,
105-
bool is_first = false);
114+
static void addDoubleToStream(absl::string_view key, double value, std::stringstream& info,
115+
bool is_first);
106116

107117
/**
108-
* Format the given key and uint64_t value to "key"=<string of uint64_t>, and adding to the
118+
* Format the given key and absl::string_view value to "key"="value", and adding to the
109119
* stringstream.
110120
*/
111-
void addIntToStream(absl::string_view key, uint64_t value, std::stringstream& info,
112-
bool is_first = false);
121+
static void addStringToStream(absl::string_view key, absl::string_view value,
122+
std::stringstream& info, bool is_first = false);
113123

114124
/**
115-
* Format the given key and value to "key"=value, and adding to the stringstream.
125+
* Format the given key and uint64_t value to "key"=<string of uint64_t>, and adding to the
126+
* stringstream.
116127
*/
117-
void addInfoToStream(absl::string_view key, absl::string_view value, std::stringstream& info,
118-
bool is_first = false);
128+
static void addIntToStream(absl::string_view key, uint64_t value, std::stringstream& info,
129+
bool is_first = false);
130+
131+
static void addHistogramToStream(const QuantileLatencyMap& latency_map, absl::string_view key,
132+
std::stringstream& ss);
119133

134+
private:
120135
/**
121136
* Generate HystrixCommand event stream.
122137
*/
123138
void addHystrixCommand(ClusterStatsCache& cluster_stats_cache, absl::string_view cluster_name,
124139
uint64_t max_concurrent_requests, uint64_t reporting_hosts,
125-
std::chrono::milliseconds rolling_window_ms, std::stringstream& ss);
140+
std::chrono::milliseconds rolling_window_ms,
141+
const QuantileLatencyMap& histogram, std::stringstream& ss);
126142

127143
/**
128144
* Generate HystrixThreadPool event stream.

test/common/stats/thread_local_store_test.cc

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,24 @@ class StatsThreadLocalStoreTest : public testing::Test {
4747
std::unique_ptr<ThreadLocalStoreImpl> store_;
4848
};
4949

50+
class HistogramWrapper {
51+
public:
52+
HistogramWrapper() : histogram_(hist_alloc()) {}
53+
54+
~HistogramWrapper() { hist_free(histogram_); }
55+
56+
const histogram_t* getHistogram() { return histogram_; }
57+
58+
void setHistogramValues(const std::vector<uint64_t>& values) {
59+
for (uint64_t value : values) {
60+
hist_insert_intscale(histogram_, value, 0, 1);
61+
}
62+
}
63+
64+
private:
65+
histogram_t* histogram_;
66+
};
67+
5068
class HistogramTest : public testing::Test {
5169
public:
5270
typedef std::map<std::string, ParentHistogramSharedPtr> NameHistogramMap;
@@ -88,15 +106,20 @@ class HistogramTest : public testing::Test {
88106

89107
std::vector<ParentHistogramSharedPtr> histogram_list = store_->histograms();
90108

91-
histogram_t* hist1_cumulative = makeHistogram(h1_cumulative_values_);
92-
histogram_t* hist2_cumulative = makeHistogram(h2_cumulative_values_);
93-
histogram_t* hist1_interval = makeHistogram(h1_interval_values_);
94-
histogram_t* hist2_interval = makeHistogram(h2_interval_values_);
109+
HistogramWrapper hist1_cumulative;
110+
HistogramWrapper hist2_cumulative;
111+
HistogramWrapper hist1_interval;
112+
HistogramWrapper hist2_interval;
95113

96-
HistogramStatisticsImpl h1_cumulative_statistics(hist1_cumulative);
97-
HistogramStatisticsImpl h2_cumulative_statistics(hist2_cumulative);
98-
HistogramStatisticsImpl h1_interval_statistics(hist1_interval);
99-
HistogramStatisticsImpl h2_interval_statistics(hist2_interval);
114+
hist1_cumulative.setHistogramValues(h1_cumulative_values_);
115+
hist2_cumulative.setHistogramValues(h2_cumulative_values_);
116+
hist1_interval.setHistogramValues(h1_interval_values_);
117+
hist2_interval.setHistogramValues(h2_interval_values_);
118+
119+
HistogramStatisticsImpl h1_cumulative_statistics(hist1_cumulative.getHistogram());
120+
HistogramStatisticsImpl h2_cumulative_statistics(hist2_cumulative.getHistogram());
121+
HistogramStatisticsImpl h1_interval_statistics(hist1_interval.getHistogram());
122+
HistogramStatisticsImpl h2_interval_statistics(hist2_interval.getHistogram());
100123

101124
NameHistogramMap name_histogram_map = makeHistogramMap(histogram_list);
102125
const ParentHistogramSharedPtr& h1 = name_histogram_map["h1"];
@@ -109,11 +132,6 @@ class HistogramTest : public testing::Test {
109132
EXPECT_EQ(h2->intervalStatistics().summary(), h2_interval_statistics.summary());
110133
}
111134

112-
hist_free(hist1_cumulative);
113-
hist_free(hist2_cumulative);
114-
hist_free(hist1_interval);
115-
hist_free(hist2_interval);
116-
117135
h1_interval_values_.clear();
118136
h2_interval_values_.clear();
119137

@@ -133,14 +151,6 @@ class HistogramTest : public testing::Test {
133151
}
134152
}
135153

136-
histogram_t* makeHistogram(const std::vector<uint64_t>& values) {
137-
histogram_t* histogram = hist_alloc();
138-
for (uint64_t value : values) {
139-
hist_insert_intscale(histogram, value, 0, 1);
140-
}
141-
return histogram;
142-
}
143-
144154
MOCK_METHOD1(alloc, RawStatData*(const std::string& name));
145155
MOCK_METHOD1(free, void(RawStatData& data));
146156

@@ -610,9 +620,9 @@ TEST_F(HistogramTest, BasicHistogramSummaryValidate) {
610620

611621
const std::string h1_expected_summary =
612622
"P0: 1, P25: 1.025, P50: 1.05, P75: 1.075, P90: 1.09, P95: 1.095, "
613-
"P99: 1.099, P99.9: 1.0999, P100: 1.1";
614-
const std::string h2_expected_summary =
615-
"P0: 0, P25: 25, P50: 50, P75: 75, P90: 90, P95: 95, P99: 99, P99.9: 99.9, P100: 100";
623+
"P99: 1.099, P99.5: 1.0995, P99.9: 1.0999, P100: 1.1";
624+
const std::string h2_expected_summary = "P0: 0, P25: 25, P50: 50, P75: 75, P90: 90, P95: 95, "
625+
"P99: 99, P99.5: 99.5, P99.9: 99.9, P100: 100";
616626

617627
for (size_t i = 0; i < 100; ++i) {
618628
expectCallAndAccumulate(h2, i);
@@ -639,8 +649,8 @@ TEST_F(HistogramTest, BasicHistogramMergeSummary) {
639649
}
640650
EXPECT_EQ(1, validateMerge());
641651

642-
const std::string expected_summary =
643-
"P0: 0, P25: 25, P50: 50, P75: 75, P90: 90, P95: 95, P99: 99, P99.9: 99.9, P100: 100";
652+
const std::string expected_summary = "P0: 0, P25: 25, P50: 50, P75: 75, P90: 90, P95: 95, P99: "
653+
"99, P99.5: 99.5, P99.9: 99.9, P100: 100";
644654

645655
NameHistogramMap name_histogram_map = makeHistogramMap(store_->histograms());
646656
EXPECT_EQ(expected_summary, name_histogram_map["h1"]->cumulativeStatistics().summary());

0 commit comments

Comments
 (0)