@@ -436,11 +436,9 @@ bool InitializeICUDirectory(const std::string& path) {
436436
437437int32_t ToUnicode (MaybeStackBuffer<char >* buf,
438438 const char * input,
439- size_t length,
440- bool lenient) {
439+ size_t length) {
441440 UErrorCode status = U_ZERO_ERROR;
442- uint32_t options = UIDNA_DEFAULT;
443- options |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
441+ uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE;
444442 UIDNA* uidna = uidna_openUTS46 (options, &status);
445443 if (U_FAILURE (status))
446444 return -1 ;
@@ -462,14 +460,10 @@ int32_t ToUnicode(MaybeStackBuffer<char>* buf,
462460 &status);
463461 }
464462
465- // UTS #46's ToUnicode operation applies no validation of domain name length
466- // (nor a flag requesting it to do so, like VerifyDnsLength for ToASCII). For
467- // that reason, unlike ToASCII below, ICU4C correctly accepts long domain
468- // names. However, ICU4C still sets the EMPTY_LABEL error in contrary to UTS
469- // #46. Therefore, explicitly filters out that error here.
470- info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
463+ // info.errors is ignored as UTS #46 ToUnicode always produces a Unicode
464+ // string, regardless of whether an error occurred.
471465
472- if (U_FAILURE (status) || (!lenient && info. errors != 0 ) ) {
466+ if (U_FAILURE (status)) {
473467 len = -1 ;
474468 buf->SetLength (0 );
475469 } else {
@@ -485,8 +479,7 @@ int32_t ToASCII(MaybeStackBuffer<char>* buf,
485479 size_t length,
486480 bool lenient) {
487481 UErrorCode status = U_ZERO_ERROR;
488- uint32_t options = UIDNA_DEFAULT;
489- options |= UIDNA_NONTRANSITIONAL_TO_ASCII;
482+ uint32_t options = UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_CHECK_BIDI;
490483 UIDNA* uidna = uidna_openUTS46 (options, &status);
491484 if (U_FAILURE (status))
492485 return -1 ;
@@ -518,6 +511,21 @@ int32_t ToASCII(MaybeStackBuffer<char>* buf,
518511 info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
519512 info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
520513
514+ // These error conditions are mandated unconditionally by UTS #46 version
515+ // 9.0.0 (rev. 17), but were found to be incompatible with actual domain
516+ // names in the wild. As such, in the current UTS #46 draft (rev. 18) these
517+ // checks are made optional depending on the CheckHyphens flag, which will be
518+ // disabled in WHATWG URL's "domain to ASCII" algorithm soon.
519+ // Refs:
520+ // - https://github.com/whatwg/url/issues/53
521+ // - https://github.com/whatwg/url/pull/309
522+ // - http://www.unicode.org/review/pri317/
523+ // - http://www.unicode.org/reports/tr46/tr46-18.html
524+ // - https://www.icann.org/news/announcement-2000-01-07-en
525+ info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
526+ info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
527+ info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
528+
521529 if (U_FAILURE (status) || (!lenient && info.errors != 0 )) {
522530 len = -1 ;
523531 buf->SetLength (0 );
@@ -534,11 +542,9 @@ static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
534542 CHECK_GE (args.Length (), 1 );
535543 CHECK (args[0 ]->IsString ());
536544 Utf8Value val (env->isolate (), args[0 ]);
537- // optional arg
538- bool lenient = args[1 ]->BooleanValue (env->context ()).FromJust ();
539545
540546 MaybeStackBuffer<char > buf;
541- int32_t len = ToUnicode (&buf, *val, val.length (), lenient );
547+ int32_t len = ToUnicode (&buf, *val, val.length ());
542548
543549 if (len < 0 ) {
544550 return env->ThrowError (" Cannot convert name to Unicode" );
0 commit comments