Skip to content

Commit 0977bb6

Browse files
anonrigmarco-ippolito
authored andcommitted
src: remove icu based ToASCII and ToUnicode
PR-URL: #55156 Reviewed-By: James M Snell <[email protected]> Reviewed-By: Matthew Aitken <[email protected]> Reviewed-By: Daniel Lemire <[email protected]> Reviewed-By: Richard Lau <[email protected]>
1 parent 43f7050 commit 0977bb6

File tree

4 files changed

+2
-389
lines changed

4 files changed

+2
-389
lines changed

src/node_i18n.cc

+2-170
Original file line numberDiff line numberDiff line change
@@ -60,18 +60,16 @@
6060
#include <unicode/uchar.h>
6161
#include <unicode/uclean.h>
6262
#include <unicode/ucnv.h>
63-
#include <unicode/udata.h>
64-
#include <unicode/uidna.h>
6563
#include <unicode/ulocdata.h>
6664
#include <unicode/urename.h>
67-
#include <unicode/ustring.h>
6865
#include <unicode/utf16.h>
69-
#include <unicode/utf8.h>
7066
#include <unicode/utypes.h>
7167
#include <unicode/uvernum.h>
7268
#include <unicode/uversion.h>
7369

7470
#ifdef NODE_HAVE_SMALL_ICU
71+
#include <unicode/udata.h>
72+
7573
/* if this is defined, we have a 'secondary' entry point.
7674
compare following to utypes.h defs for U_ICUDATA_ENTRY_POINT */
7775
#define SMALL_ICUDATA_ENTRY_POINT \
@@ -95,7 +93,6 @@ using v8::Int32;
9593
using v8::Isolate;
9694
using v8::Local;
9795
using v8::MaybeLocal;
98-
using v8::NewStringType;
9996
using v8::Object;
10097
using v8::ObjectTemplate;
10198
using v8::String;
@@ -582,167 +579,6 @@ void SetDefaultTimeZone(const char* tzid) {
582579
CHECK(U_SUCCESS(status));
583580
}
584581

585-
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
586-
const char* input,
587-
size_t length) {
588-
UErrorCode status = U_ZERO_ERROR;
589-
uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE;
590-
UIDNA* uidna = uidna_openUTS46(options, &status);
591-
if (U_FAILURE(status))
592-
return -1;
593-
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
594-
595-
int32_t len = uidna_nameToUnicodeUTF8(uidna,
596-
input, length,
597-
**buf, buf->capacity(),
598-
&info,
599-
&status);
600-
601-
// Do not check info.errors like we do with ToASCII since ToUnicode always
602-
// returns a string, despite any possible errors that may have occurred.
603-
604-
if (status == U_BUFFER_OVERFLOW_ERROR) {
605-
status = U_ZERO_ERROR;
606-
buf->AllocateSufficientStorage(len);
607-
len = uidna_nameToUnicodeUTF8(uidna,
608-
input, length,
609-
**buf, buf->capacity(),
610-
&info,
611-
&status);
612-
}
613-
614-
// info.errors is ignored as UTS #46 ToUnicode always produces a Unicode
615-
// string, regardless of whether an error occurred.
616-
617-
if (U_FAILURE(status)) {
618-
len = -1;
619-
buf->SetLength(0);
620-
} else {
621-
buf->SetLength(len);
622-
}
623-
624-
uidna_close(uidna);
625-
return len;
626-
}
627-
628-
int32_t ToASCII(MaybeStackBuffer<char>* buf,
629-
const char* input,
630-
size_t length,
631-
idna_mode mode) {
632-
UErrorCode status = U_ZERO_ERROR;
633-
uint32_t options = // CheckHyphens = false; handled later
634-
UIDNA_CHECK_BIDI | // CheckBidi = true
635-
UIDNA_CHECK_CONTEXTJ | // CheckJoiners = true
636-
UIDNA_NONTRANSITIONAL_TO_ASCII; // Nontransitional_Processing
637-
if (mode == idna_mode::kStrict) {
638-
options |= UIDNA_USE_STD3_RULES; // UseSTD3ASCIIRules = beStrict
639-
// VerifyDnsLength = beStrict;
640-
// handled later
641-
}
642-
643-
UIDNA* uidna = uidna_openUTS46(options, &status);
644-
if (U_FAILURE(status))
645-
return -1;
646-
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
647-
648-
int32_t len = uidna_nameToASCII_UTF8(uidna,
649-
input, length,
650-
**buf, buf->capacity(),
651-
&info,
652-
&status);
653-
654-
if (status == U_BUFFER_OVERFLOW_ERROR) {
655-
status = U_ZERO_ERROR;
656-
buf->AllocateSufficientStorage(len);
657-
len = uidna_nameToASCII_UTF8(uidna,
658-
input, length,
659-
**buf, buf->capacity(),
660-
&info,
661-
&status);
662-
}
663-
664-
// In UTS #46 which specifies ToASCII, certain error conditions are
665-
// configurable through options, and the WHATWG URL Standard promptly elects
666-
// to disable some of them to accommodate for real-world use cases.
667-
// Unfortunately, ICU4C's IDNA module does not support disabling some of
668-
// these options through `options` above, and thus continues throwing
669-
// unnecessary errors. To counter this situation, we just filter out the
670-
// errors that may have happened afterwards, before deciding whether to
671-
// return an error from this function.
672-
673-
// CheckHyphens = false
674-
// (Specified in the current UTS #46 draft rev. 18.)
675-
// Refs:
676-
// - https://github.com/whatwg/url/issues/53
677-
// - https://github.com/whatwg/url/pull/309
678-
// - http://www.unicode.org/review/pri317/
679-
// - http://www.unicode.org/reports/tr46/tr46-18.html
680-
// - https://www.icann.org/news/announcement-2000-01-07-en
681-
info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
682-
info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
683-
info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
684-
685-
if (mode != idna_mode::kStrict) {
686-
// VerifyDnsLength = beStrict
687-
info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
688-
info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
689-
info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
690-
}
691-
692-
if (U_FAILURE(status) || (mode != idna_mode::kLenient && info.errors != 0)) {
693-
len = -1;
694-
buf->SetLength(0);
695-
} else {
696-
buf->SetLength(len);
697-
}
698-
699-
uidna_close(uidna);
700-
return len;
701-
}
702-
703-
static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
704-
Environment* env = Environment::GetCurrent(args);
705-
CHECK_GE(args.Length(), 1);
706-
CHECK(args[0]->IsString());
707-
Utf8Value val(env->isolate(), args[0]);
708-
709-
MaybeStackBuffer<char> buf;
710-
int32_t len = ToUnicode(&buf, *val, val.length());
711-
712-
if (len < 0) {
713-
return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to Unicode");
714-
}
715-
716-
args.GetReturnValue().Set(
717-
String::NewFromUtf8(env->isolate(),
718-
*buf,
719-
NewStringType::kNormal,
720-
len).ToLocalChecked());
721-
}
722-
723-
static void ToASCII(const FunctionCallbackInfo<Value>& args) {
724-
Environment* env = Environment::GetCurrent(args);
725-
CHECK_GE(args.Length(), 1);
726-
CHECK(args[0]->IsString());
727-
Utf8Value val(env->isolate(), args[0]);
728-
// optional arg
729-
bool lenient = args[1]->BooleanValue(env->isolate());
730-
idna_mode mode = lenient ? idna_mode::kLenient : idna_mode::kDefault;
731-
732-
MaybeStackBuffer<char> buf;
733-
int32_t len = ToASCII(&buf, *val, val.length(), mode);
734-
735-
if (len < 0) {
736-
return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to ASCII");
737-
}
738-
739-
args.GetReturnValue().Set(
740-
String::NewFromUtf8(env->isolate(),
741-
*buf,
742-
NewStringType::kNormal,
743-
len).ToLocalChecked());
744-
}
745-
746582
// This is similar to wcwidth except that it takes the current unicode
747583
// character properties database into consideration, allowing it to
748584
// correctly calculate the column widths of things like emoji's and
@@ -849,8 +685,6 @@ static void CreatePerIsolateProperties(IsolateData* isolate_data,
849685
Local<ObjectTemplate> target) {
850686
Isolate* isolate = isolate_data->isolate();
851687

852-
SetMethod(isolate, target, "toUnicode", ToUnicode);
853-
SetMethod(isolate, target, "toASCII", ToASCII);
854688
SetMethod(isolate, target, "getStringWidth", GetStringWidth);
855689

856690
// One-shot converters
@@ -879,8 +713,6 @@ void CreatePerContextProperties(Local<Object> target,
879713
void* priv) {}
880714

881715
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
882-
registry->Register(ToUnicode);
883-
registry->Register(ToASCII);
884716
registry->Register(GetStringWidth);
885717
registry->Register(ICUErrorName);
886718
registry->Register(Transcode);

src/node_i18n.h

-13
Original file line numberDiff line numberDiff line change
@@ -53,19 +53,6 @@ enum class idna_mode {
5353
kStrict
5454
};
5555

56-
// Implements the WHATWG URL Standard "domain to ASCII" algorithm.
57-
// https://url.spec.whatwg.org/#concept-domain-to-ascii
58-
int32_t ToASCII(MaybeStackBuffer<char>* buf,
59-
const char* input,
60-
size_t length,
61-
idna_mode mode = idna_mode::kDefault);
62-
63-
// Implements the WHATWG URL Standard "domain to Unicode" algorithm.
64-
// https://url.spec.whatwg.org/#concept-domain-to-unicode
65-
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
66-
const char* input,
67-
size_t length);
68-
6956
struct ConverterDeleter {
7057
void operator()(UConverter* pointer) const { ucnv_close(pointer); }
7158
};

test/fixtures/icu-punycode-toascii.json

-149
This file was deleted.

0 commit comments

Comments
 (0)