Skip to content

Commit 7c46245

Browse files
committed
Merged: Fix out-of-range access in unibrow::Utf8::CalculateValue.
Revision: 9d524bd BUG=chromium:662822,chromium:667260 LOG=N NOTRY=true NOPRESUBMIT=true NOTREECHECKS=true [email protected] Review URL: https://codereview.chromium.org/2521933002 . Cr-Commit-Position: refs/branch-heads/5.5@{#54} Cr-Branched-From: 3cbd583-refs/heads/5.5.372@{#1} Cr-Branched-From: b3c8b0c-refs/heads/master@{#40015}
1 parent 43ab8dc commit 7c46245

File tree

5 files changed

+52
-14
lines changed

5 files changed

+52
-14
lines changed

src/unicode-decoder.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@
77

88
#include <sys/types.h>
99
#include "src/globals.h"
10+
#include "src/utils.h"
1011

1112
namespace unibrow {
1213

13-
class Utf8DecoderBase {
14+
class V8_EXPORT_PRIVATE Utf8DecoderBase {
1415
public:
1516
// Initialization done in subclass.
1617
inline Utf8DecoderBase();

src/unicode.cc

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -235,35 +235,31 @@ uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) {
235235
while (count < max_count && IsContinuationCharacter(str[count])) {
236236
count++;
237237
}
238+
*cursor += count;
238239

239-
// Check overly long sequences & other conditions. Use length as error
240-
// indicator.
240+
// There must be enough continuation characters.
241+
if (count != length) return kBadChar;
242+
243+
// Check overly long sequences & other conditions.
241244
if (length == 3) {
242245
if (str[0] == 0xE0 && (str[1] < 0xA0 || str[1] > 0xBF)) {
243246
// Overlong three-byte sequence?
244-
length = 0;
247+
return kBadChar;
245248
} else if (str[0] == 0xED && (str[1] < 0x80 || str[1] > 0x9F)) {
246249
// High and low surrogate halves?
247-
length = 0;
250+
return kBadChar;
248251
}
249252
} else if (length == 4) {
250253
if (str[0] == 0xF0 && (str[1] < 0x90 || str[1] > 0xBF)) {
251254
// Overlong four-byte sequence.
252-
length = 0;
255+
return kBadChar;
253256
} else if (str[0] == 0xF4 && (str[1] < 0x80 || str[1] > 0x8F)) {
254257
// Code points outside of the unicode range.
255-
length = 0;
258+
return kBadChar;
256259
}
257260
}
258261

259-
if (count != length) {
260-
// All invalid encodings should land here.
261-
*cursor += count;
262-
return kBadChar;
263-
}
264-
265262
// All errors have been handled, so we only have to assemble the result.
266-
*cursor += length;
267263
switch (length) {
268264
case 1:
269265
return str[0];

test/unittests/BUILD.gn

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ v8_executable("unittests") {
117117
"source-position-table-unittest.cc",
118118
"test-utils.cc",
119119
"test-utils.h",
120+
"unicode-unittest.cc",
120121
"value-serializer-unittest.cc",
121122
"wasm/asm-types-unittest.cc",
122123
"wasm/ast-decoder-unittest.cc",

test/unittests/unicode-unittest.cc

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// Copyright 2016 the V8 project authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style license that can be
3+
// found in the LICENSE file.
4+
5+
#include <memory>
6+
#include <string>
7+
8+
#include "src/unicode-decoder.h"
9+
#include "testing/gtest/include/gtest/gtest.h"
10+
11+
namespace v8 {
12+
namespace internal {
13+
14+
namespace {
15+
16+
using Utf8Decoder = unibrow::Utf8Decoder<512>;
17+
18+
void Decode(Utf8Decoder* decoder, const std::string& str) {
19+
// Put the string in its own buffer on the heap to make sure that
20+
// AddressSanitizer's heap-buffer-overflow logic can see what's going on.
21+
std::unique_ptr<char[]> buffer(new char[str.length()]);
22+
memcpy(buffer.get(), str.data(), str.length());
23+
decoder->Reset(buffer.get(), str.length());
24+
}
25+
26+
} // namespace
27+
28+
TEST(UnicodeTest, ReadOffEndOfUtf8String) {
29+
Utf8Decoder decoder;
30+
31+
// Not enough continuation bytes before string ends.
32+
Decode(&decoder, "\xE0");
33+
Decode(&decoder, "\xED");
34+
Decode(&decoder, "\xF0");
35+
Decode(&decoder, "\xF4");
36+
}
37+
38+
} // namespace internal
39+
} // namespace v8

test/unittests/unittests.gyp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@
115115
'source-position-table-unittest.cc',
116116
'test-utils.h',
117117
'test-utils.cc',
118+
'unicode-unittest.cc',
118119
'value-serializer-unittest.cc',
119120
'wasm/asm-types-unittest.cc',
120121
'wasm/ast-decoder-unittest.cc',

0 commit comments

Comments
 (0)