Skip to content

Commit 30a350f

Browse files
FrankYFTangCommit Bot
authored andcommitted
[Intl] Clean up icu include and #ifdef
Requires ICU 63 or above be used when building v8 1. Remove unneeded #include of icu header files 2. Remove code inside "#if U_ICU_VERSION_MAJOR_NUM < x" block where x is 63 or smaller. Bug: v8:8401 v8:5751 Change-Id: I908b0d7d174df53d4296580fe7150417322b0b21 Reviewed-on: https://chromium-review.googlesource.com/c/1314112 Reviewed-by: Jungshik Shin <[email protected]> Reviewed-by: Yang Guo <[email protected]> Reviewed-by: Sathya Gunasekaran <[email protected]> Commit-Queue: Frank Tang <[email protected]> Cr-Commit-Position: refs/heads/master@{#57341}
1 parent 0a7e08e commit 30a350f

11 files changed

Lines changed: 7 additions & 353 deletions

src/isolate.cc

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,6 @@
6969
#include "src/wasm/wasm-engine.h"
7070
#include "src/wasm/wasm-objects.h"
7171
#include "src/zone/accounting-allocator.h"
72-
#ifdef V8_INTL_SUPPORT
73-
#include "unicode/regex.h"
74-
#endif // V8_INTL_SUPPORT
7572

7673
namespace v8 {
7774
namespace internal {
@@ -2888,19 +2885,6 @@ Isolate::~Isolate() {
28882885
delete date_cache_;
28892886
date_cache_ = nullptr;
28902887

2891-
#ifdef V8_INTL_SUPPORT
2892-
#if USE_CHROMIUM_ICU == 0 && U_ICU_VERSION_MAJOR_NUM < 63
2893-
delete language_singleton_regexp_matcher_;
2894-
language_singleton_regexp_matcher_ = nullptr;
2895-
2896-
delete language_tag_regexp_matcher_;
2897-
language_tag_regexp_matcher_ = nullptr;
2898-
2899-
delete language_variant_regexp_matcher_;
2900-
language_variant_regexp_matcher_ = nullptr;
2901-
#endif // USE_CHROMIUM_ICU == 0 && U_ICU_VERSION_MAJOR_NUM < 63
2902-
#endif // V8_INTL_SUPPORT
2903-
29042888
delete regexp_stack_;
29052889
regexp_stack_ = nullptr;
29062890

src/isolate.h

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,6 @@
3636
#include "src/thread-id.h"
3737
#include "src/unicode.h"
3838

39-
#ifdef V8_INTL_SUPPORT
40-
#include "unicode/uversion.h" // Define U_ICU_NAMESPACE.
41-
// 'icu' does not work. Use U_ICU_NAMESPACE.
42-
namespace U_ICU_NAMESPACE {
43-
44-
class RegexMatcher;
45-
46-
} // namespace U_ICU_NAMESPACE
47-
#endif // V8_INTL_SUPPORT
48-
4939
namespace v8 {
5040

5141
namespace base {
@@ -1190,19 +1180,6 @@ class Isolate final : private HiddenFactory {
11901180
}
11911181

11921182
#ifdef V8_INTL_SUPPORT
1193-
#if USE_CHROMIUM_ICU == 0 && U_ICU_VERSION_MAJOR_NUM < 63
1194-
icu::RegexMatcher* language_singleton_regexp_matcher() {
1195-
return language_singleton_regexp_matcher_;
1196-
}
1197-
1198-
icu::RegexMatcher* language_tag_regexp_matcher() {
1199-
return language_tag_regexp_matcher_;
1200-
}
1201-
1202-
icu::RegexMatcher* language_variant_regexp_matcher() {
1203-
return language_variant_regexp_matcher_;
1204-
}
1205-
#endif // USE_CHROMIUM_ICU == 0 && U_ICU_VERSION_MAJOR_NUM < 63
12061183

12071184
const std::string& default_locale() { return default_locale_; }
12081185

@@ -1211,19 +1188,6 @@ class Isolate final : private HiddenFactory {
12111188
default_locale_ = locale;
12121189
}
12131190

1214-
#if USE_CHROMIUM_ICU == 0 && U_ICU_VERSION_MAJOR_NUM < 63
1215-
void set_language_tag_regexp_matchers(
1216-
icu::RegexMatcher* language_singleton_regexp_matcher,
1217-
icu::RegexMatcher* language_tag_regexp_matcher,
1218-
icu::RegexMatcher* language_variant_regexp_matcher) {
1219-
DCHECK_NULL(language_singleton_regexp_matcher_);
1220-
DCHECK_NULL(language_tag_regexp_matcher_);
1221-
DCHECK_NULL(language_variant_regexp_matcher_);
1222-
language_singleton_regexp_matcher_ = language_singleton_regexp_matcher;
1223-
language_tag_regexp_matcher_ = language_tag_regexp_matcher;
1224-
language_variant_regexp_matcher_ = language_variant_regexp_matcher;
1225-
}
1226-
#endif // USE_CHROMIUM_ICU == 0 && U_ICU_VERSION_MAJOR_NUM < 63
12271191
#endif // V8_INTL_SUPPORT
12281192

12291193
static const int kProtectorValid = 1;
@@ -1766,11 +1730,6 @@ class Isolate final : private HiddenFactory {
17661730
double load_start_time_ms_ = 0;
17671731

17681732
#ifdef V8_INTL_SUPPORT
1769-
#if USE_CHROMIUM_ICU == 0 && U_ICU_VERSION_MAJOR_NUM < 63
1770-
icu::RegexMatcher* language_singleton_regexp_matcher_ = nullptr;
1771-
icu::RegexMatcher* language_tag_regexp_matcher_ = nullptr;
1772-
icu::RegexMatcher* language_variant_regexp_matcher_ = nullptr;
1773-
#endif // USE_CHROMIUM_ICU == 0 && U_ICU_VERSION_MAJOR_NUM < 63
17741733
std::string default_locale_;
17751734
#endif // V8_INTL_SUPPORT
17761735

src/objects/intl-objects.cc

Lines changed: 0 additions & 240 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,8 @@
3232
#include "unicode/normalizer2.h"
3333
#include "unicode/numfmt.h"
3434
#include "unicode/numsys.h"
35-
#include "unicode/regex.h"
36-
#include "unicode/smpdtfmt.h"
3735
#include "unicode/timezone.h"
38-
#include "unicode/ucol.h"
39-
#include "unicode/ures.h"
4036
#include "unicode/ustring.h"
41-
#include "unicode/uvernum.h"
42-
#include "unicode/uversion.h"
4337

4438
namespace v8 {
4539
namespace internal {
@@ -573,124 +567,6 @@ MaybeHandle<Object> Intl::LegacyUnwrapReceiver(Isolate* isolate,
573567
return receiver;
574568
}
575569

576-
namespace {
577-
578-
#if USE_CHROMIUM_ICU == 0 && U_ICU_VERSION_MAJOR_NUM < 63
579-
// Define general regexp macros.
580-
// Note "(?:" means the regexp group a non-capture group.
581-
#define REGEX_ALPHA "[a-z]"
582-
#define REGEX_DIGIT "[0-9]"
583-
#define REGEX_ALPHANUM "(?:" REGEX_ALPHA "|" REGEX_DIGIT ")"
584-
585-
void BuildLanguageTagRegexps(Isolate* isolate) {
586-
// Define the language tag regexp macros.
587-
// For info on BCP 47 see https://tools.ietf.org/html/bcp47 .
588-
// Because language tags are case insensitive per BCP 47 2.1.1 and regexp's
589-
// defined below will always be used after lowercasing the input, uppercase
590-
// ranges in BCP 47 2.1 are dropped and grandfathered tags are all lowercased.
591-
// clang-format off
592-
#define BCP47_REGULAR \
593-
"(?:art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|" \
594-
"zh-min|zh-min-nan|zh-xiang)"
595-
#define BCP47_IRREGULAR \
596-
"(?:en-gb-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|" \
597-
"i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|" \
598-
"i-tsu|sgn-be-fr|sgn-be-nl|sgn-ch-de)"
599-
#define BCP47_GRANDFATHERED "(?:" BCP47_IRREGULAR "|" BCP47_REGULAR ")"
600-
#define BCP47_PRIVATE_USE "(?:x(?:-" REGEX_ALPHANUM "{1,8})+)"
601-
602-
#define BCP47_SINGLETON "(?:" REGEX_DIGIT "|" "[a-wy-z])"
603-
604-
#define BCP47_EXTENSION "(?:" BCP47_SINGLETON "(?:-" REGEX_ALPHANUM "{2,8})+)"
605-
#define BCP47_VARIANT \
606-
"(?:" REGEX_ALPHANUM "{5,8}" "|" "(?:" REGEX_DIGIT REGEX_ALPHANUM "{3}))"
607-
608-
#define BCP47_REGION "(?:" REGEX_ALPHA "{2}" "|" REGEX_DIGIT "{3})"
609-
#define BCP47_SCRIPT "(?:" REGEX_ALPHA "{4})"
610-
#define BCP47_EXT_LANG "(?:" REGEX_ALPHA "{3}(?:-" REGEX_ALPHA "{3}){0,2})"
611-
#define BCP47_LANGUAGE "(?:" REGEX_ALPHA "{2,3}(?:-" BCP47_EXT_LANG ")?" \
612-
"|" REGEX_ALPHA "{4}" "|" REGEX_ALPHA "{5,8})"
613-
#define BCP47_LANG_TAG \
614-
BCP47_LANGUAGE \
615-
"(?:-" BCP47_SCRIPT ")?" \
616-
"(?:-" BCP47_REGION ")?" \
617-
"(?:-" BCP47_VARIANT ")*" \
618-
"(?:-" BCP47_EXTENSION ")*" \
619-
"(?:-" BCP47_PRIVATE_USE ")?"
620-
// clang-format on
621-
622-
constexpr char kLanguageTagSingletonRegexp[] = "^" BCP47_SINGLETON "$";
623-
constexpr char kLanguageTagVariantRegexp[] = "^" BCP47_VARIANT "$";
624-
constexpr char kLanguageTagRegexp[] =
625-
"^(?:" BCP47_LANG_TAG "|" BCP47_PRIVATE_USE "|" BCP47_GRANDFATHERED ")$";
626-
627-
UErrorCode status = U_ZERO_ERROR;
628-
icu::RegexMatcher* language_singleton_regexp_matcher = new icu::RegexMatcher(
629-
icu::UnicodeString(kLanguageTagSingletonRegexp, -1, US_INV), 0, status);
630-
icu::RegexMatcher* language_tag_regexp_matcher = new icu::RegexMatcher(
631-
icu::UnicodeString(kLanguageTagRegexp, -1, US_INV), 0, status);
632-
icu::RegexMatcher* language_variant_regexp_matcher = new icu::RegexMatcher(
633-
icu::UnicodeString(kLanguageTagVariantRegexp, -1, US_INV), 0, status);
634-
CHECK(U_SUCCESS(status));
635-
636-
isolate->set_language_tag_regexp_matchers(language_singleton_regexp_matcher,
637-
language_tag_regexp_matcher,
638-
language_variant_regexp_matcher);
639-
// Undefine the language tag regexp macros.
640-
#undef BCP47_EXTENSION
641-
#undef BCP47_EXT_LANG
642-
#undef BCP47_GRANDFATHERED
643-
#undef BCP47_IRREGULAR
644-
#undef BCP47_LANG_TAG
645-
#undef BCP47_LANGUAGE
646-
#undef BCP47_PRIVATE_USE
647-
#undef BCP47_REGION
648-
#undef BCP47_REGULAR
649-
#undef BCP47_SCRIPT
650-
#undef BCP47_SINGLETON
651-
#undef BCP47_VARIANT
652-
}
653-
654-
// Undefine the general regexp macros.
655-
#undef REGEX_ALPHA
656-
#undef REGEX_DIGIT
657-
#undef REGEX_ALPHANUM
658-
659-
icu::RegexMatcher* GetLanguageSingletonRegexMatcher(Isolate* isolate) {
660-
icu::RegexMatcher* language_singleton_regexp_matcher =
661-
isolate->language_singleton_regexp_matcher();
662-
if (language_singleton_regexp_matcher == nullptr) {
663-
BuildLanguageTagRegexps(isolate);
664-
language_singleton_regexp_matcher =
665-
isolate->language_singleton_regexp_matcher();
666-
}
667-
return language_singleton_regexp_matcher;
668-
}
669-
670-
icu::RegexMatcher* GetLanguageTagRegexMatcher(Isolate* isolate) {
671-
icu::RegexMatcher* language_tag_regexp_matcher =
672-
isolate->language_tag_regexp_matcher();
673-
if (language_tag_regexp_matcher == nullptr) {
674-
BuildLanguageTagRegexps(isolate);
675-
language_tag_regexp_matcher = isolate->language_tag_regexp_matcher();
676-
}
677-
return language_tag_regexp_matcher;
678-
}
679-
680-
icu::RegexMatcher* GetLanguageVariantRegexMatcher(Isolate* isolate) {
681-
icu::RegexMatcher* language_variant_regexp_matcher =
682-
isolate->language_variant_regexp_matcher();
683-
if (language_variant_regexp_matcher == nullptr) {
684-
BuildLanguageTagRegexps(isolate);
685-
language_variant_regexp_matcher =
686-
isolate->language_variant_regexp_matcher();
687-
}
688-
return language_variant_regexp_matcher;
689-
}
690-
#endif // USE_CHROMIUM_ICU == 0 && U_ICU_VERSION_MAJOR_NUM < 63
691-
692-
} // anonymous namespace
693-
694570
Maybe<bool> Intl::GetStringOption(Isolate* isolate, Handle<JSReceiver> options,
695571
const char* property,
696572
std::vector<const char*> values,
@@ -776,111 +652,6 @@ char AsciiToLower(char c) {
776652
return c | (1 << 5);
777653
}
778654

779-
#if USE_CHROMIUM_ICU == 0 && U_ICU_VERSION_MAJOR_NUM < 63
780-
/**
781-
* Check the structural Validity of the language tag per ECMA 402 6.2.2:
782-
* - Well-formed per RFC 5646 2.1
783-
* - There are no duplicate variant subtags
784-
* - There are no duplicate singleton (extension) subtags
785-
*
786-
* One extra-check is done (from RFC 5646 2.2.9): the tag is compared
787-
* against the list of grandfathered tags. However, subtags for
788-
* primary/extended language, script, region, variant are not checked
789-
* against the IANA language subtag registry.
790-
*
791-
* ICU 62 or earlier is too permissible and lets invalid tags, like
792-
* hant-cmn-cn, through.
793-
*
794-
* Returns false if the language tag is invalid.
795-
*/
796-
bool IsStructurallyValidLanguageTag(Isolate* isolate,
797-
const std::string& locale_in) {
798-
if (!String::IsAscii(locale_in.c_str(),
799-
static_cast<int>(locale_in.length()))) {
800-
return false;
801-
}
802-
std::string locale(locale_in);
803-
icu::RegexMatcher* language_tag_regexp_matcher =
804-
GetLanguageTagRegexMatcher(isolate);
805-
806-
// Check if it's well-formed, including grandfathered tags.
807-
icu::UnicodeString locale_uni(locale.c_str(), -1, US_INV);
808-
// Note: icu::RegexMatcher::reset does not make a copy of the input string
809-
// so cannot use a temp value; ie: cannot create it as a call parameter.
810-
language_tag_regexp_matcher->reset(locale_uni);
811-
UErrorCode status = U_ZERO_ERROR;
812-
bool is_valid_lang_tag = language_tag_regexp_matcher->matches(status);
813-
if (!is_valid_lang_tag || V8_UNLIKELY(U_FAILURE(status))) {
814-
return false;
815-
}
816-
817-
// Just return if it's a x- form. It's all private.
818-
if (locale.find("x-") == 0) {
819-
return true;
820-
}
821-
822-
// Check if there are any duplicate variants or singletons (extensions).
823-
824-
// Remove private use section.
825-
locale = locale.substr(0, locale.find("-x-"));
826-
827-
// Skip language since it can match variant regex, so we start from 1.
828-
// We are matching i-klingon here, but that's ok, since i-klingon-klingon
829-
// is not valid and would fail LANGUAGE_TAG_RE test.
830-
size_t pos = 0;
831-
std::vector<std::string> parts;
832-
while ((pos = locale.find('-')) != std::string::npos) {
833-
std::string token = locale.substr(0, pos);
834-
parts.push_back(token);
835-
locale = locale.substr(pos + 1);
836-
}
837-
if (locale.length() != 0) {
838-
parts.push_back(locale);
839-
}
840-
841-
icu::RegexMatcher* language_variant_regexp_matcher =
842-
GetLanguageVariantRegexMatcher(isolate);
843-
844-
icu::RegexMatcher* language_singleton_regexp_matcher =
845-
GetLanguageSingletonRegexMatcher(isolate);
846-
847-
std::vector<std::string> variants;
848-
std::vector<std::string> extensions;
849-
for (auto it = parts.begin() + 1; it != parts.end(); it++) {
850-
icu::UnicodeString part(it->data(), -1, US_INV);
851-
language_variant_regexp_matcher->reset(part);
852-
bool is_language_variant = language_variant_regexp_matcher->matches(status);
853-
if (V8_UNLIKELY(U_FAILURE(status))) {
854-
return false;
855-
}
856-
if (is_language_variant && extensions.size() == 0) {
857-
if (std::find(variants.begin(), variants.end(), *it) == variants.end()) {
858-
variants.push_back(*it);
859-
} else {
860-
return false;
861-
}
862-
}
863-
864-
language_singleton_regexp_matcher->reset(part);
865-
bool is_language_singleton =
866-
language_singleton_regexp_matcher->matches(status);
867-
if (V8_UNLIKELY(U_FAILURE(status))) {
868-
return false;
869-
}
870-
if (is_language_singleton) {
871-
if (std::find(extensions.begin(), extensions.end(), *it) ==
872-
extensions.end()) {
873-
extensions.push_back(*it);
874-
} else {
875-
return false;
876-
}
877-
}
878-
}
879-
880-
return true;
881-
}
882-
#endif // USE_CHROMIUM_ICU == 0 || U_ICU_VERSION_MAJOR_NUM < 63
883-
884655
bool IsLowerAscii(char c) { return c >= 'a' && c < 'z'; }
885656

886657
bool IsTwoLetterLanguage(const std::string& locale) {
@@ -953,15 +724,6 @@ Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate,
953724
// the input before any more check.
954725
std::transform(locale.begin(), locale.end(), locale.begin(), AsciiToLower);
955726

956-
#if USE_CHROMIUM_ICU == 0 && U_ICU_VERSION_MAJOR_NUM < 63
957-
if (!IsStructurallyValidLanguageTag(isolate, locale)) {
958-
THROW_NEW_ERROR_RETURN_VALUE(
959-
isolate,
960-
NewRangeError(MessageTemplate::kInvalidLanguageTag, locale_str),
961-
Nothing<std::string>());
962-
}
963-
#endif
964-
965727
// ICU maps a few grandfathered tags to what looks like a regular language
966728
// tag even though IANA language tag registry does not have a preferred
967729
// entry map for them. Return them as they're with lowercasing.
@@ -986,9 +748,7 @@ Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate,
986748
uloc_forLanguageTag(locale.c_str(), icu_result, ULOC_FULLNAME_CAPACITY,
987749
&parsed_length, &error);
988750
if (U_FAILURE(error) ||
989-
#if USE_CHROMIUM_ICU == 1 || U_ICU_VERSION_MAJOR_NUM >= 63
990751
static_cast<size_t>(parsed_length) < locale.length() ||
991-
#endif
992752
error == U_STRING_NOT_TERMINATED_WARNING) {
993753
THROW_NEW_ERROR_RETURN_VALUE(
994754
isolate,

src/objects/js-date-time-format.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include "unicode/calendar.h"
2121
#include "unicode/dtptngen.h"
2222
#include "unicode/gregocal.h"
23-
#include "unicode/numsys.h"
2423
#include "unicode/smpdtfmt.h"
2524
#include "unicode/unistr.h"
2625

src/objects/js-date-time-format.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "src/isolate.h"
1616
#include "src/objects/intl-objects.h"
1717
#include "src/objects/managed.h"
18+
#include "unicode/uversion.h"
1819

1920
// Has to be the last include (doesn't have include guards):
2021
#include "src/objects/object-macros.h"

0 commit comments

Comments
 (0)