Skip to content

Commit dd2d9ff

Browse files
committed
Minor fixes
1 parent d54e500 commit dd2d9ff

File tree

3 files changed

+13
-14
lines changed

3 files changed

+13
-14
lines changed

src/Functions/idna.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ namespace ErrorCodes
3030
/// - idnaEncode(), tryIdnaEncode() and idnaDecode(), see https://en.wikipedia.org/wiki/Internationalized_domain_name#ToASCII_and_ToUnicode
3131
/// and [3] https://www.unicode.org/reports/tr46/#ToUnicode
3232

33-
3433
enum class ErrorHandling
3534
{
3635
Throw, /// Throw exception
@@ -71,7 +70,7 @@ struct IdnaEncode
7170
{
7271
if constexpr (error_handling == ErrorHandling::Throw)
7372
{
74-
throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' cannot be converted to Punycode", std::string_view(value, value_length));
73+
throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' cannot be converted to ASCII", value_view);
7574
}
7675
else
7776
{
@@ -96,6 +95,7 @@ struct IdnaEncode
9695
}
9796
};
9897

98+
/// Translates an ASII-encoded IDNA string back to its UTF-8 representation.
9999
struct IdnaDecode
100100
{
101101
/// As per the specification, invalid inputs are returned as is, i.e. there is no special error handling.
@@ -113,11 +113,11 @@ struct IdnaDecode
113113
std::string unicode;
114114
for (size_t row = 0; row < rows; ++row)
115115
{
116-
const char * value = reinterpret_cast<const char *>(&data[prev_offset]);
117-
const size_t value_length = offsets[row] - prev_offset - 1;
118-
std::string_view value_view(value, value_length);
116+
const char * ascii = reinterpret_cast<const char *>(&data[prev_offset]);
117+
const size_t ascii_length = offsets[row] - prev_offset - 1;
118+
std::string_view ascii_view(ascii, ascii_length);
119119

120-
unicode = ada::idna::to_unicode(value_view);
120+
unicode = ada::idna::to_unicode(ascii_view);
121121

122122
res_data.insert(unicode.c_str(), unicode.c_str() + unicode.size() + 1);
123123
res_offsets.push_back(res_data.size());
@@ -149,7 +149,7 @@ REGISTER_FUNCTION(Idna)
149149
Computes an ASCII representation of an Internationalized Domain Name. Throws an exception in case of error.)",
150150
.syntax="idnaEncode(str)",
151151
.arguments={{"str", "Input string"}},
152-
.returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
152+
.returned_value="An ASCII-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
153153
.examples={
154154
{"simple",
155155
"SELECT idnaEncode('straße.münchen.de') AS ascii;",
@@ -166,7 +166,7 @@ Computes an ASCII representation of an Internationalized Domain Name. Throws an
166166
Computes a ASCII representation of an Internationalized Domain Name. Returns an empty string in case of error)",
167167
.syntax="punycodeEncode(str)",
168168
.arguments={{"str", "Input string"}},
169-
.returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
169+
.returned_value="An ASCII-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",
170170
.examples={
171171
{"simple",
172172
"SELECT idnaEncodeOrNull('München') AS ascii;",
@@ -180,7 +180,7 @@ Computes a ASCII representation of an Internationalized Domain Name. Returns an
180180

181181
factory.registerFunction<FunctionIdnaDecode>(FunctionDocumentation{
182182
.description=R"(
183-
Computes a Unicode representation of an Internationalized Domain Name.)",
183+
Computes the Unicode representation of ASCII-encoded Internationalized Domain Name.)",
184184
.syntax="idnaDecode(str)",
185185
.arguments={{"str", "Input string"}},
186186
.returned_value="An Unicode-encoded domain name [String](/docs/en/sql-reference/data-types/string.md).",

src/Functions/punycode.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ enum class ErrorHandling
3737

3838
struct PunycodeEncode
3939
{
40-
/// Encoding-as-punycode can only fail if the input isn't valid UTF8. In that case, returnn undefined output, i.e. garbage-in, garbage-out.
40+
/// Encoding-as-punycode can only fail if the input isn't valid UTF8. In that case, return undefined output, i.e. garbage-in, garbage-out.
4141
static void vector(
4242
const ColumnString::Chars & data,
4343
const ColumnString::Offsets & offsets,
@@ -60,7 +60,7 @@ struct PunycodeEncode
6060
value_utf32.resize(value_utf32_length);
6161
const size_t codepoints = ada::idna::utf8_to_utf32(value, value_length, value_utf32.data());
6262
if (codepoints == 0)
63-
value_utf32.clear(); /// input was empty or it is not valid UTF-8
63+
value_utf32.clear(); /// input was empty or no valid UTF-8
6464

6565
const bool ok = ada::idna::utf32_to_punycode(value_utf32, value_puny);
6666
if (!ok)

utils/check-style/aspell-ignore/en/aspell-dict.txt

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1657,7 +1657,6 @@ icudata
16571657
idempotency
16581658
idnaDecode
16591659
idnaEncode
1660-
idnaEncodeOrNull
16611660
ifNotFinite
16621661
ifNull
16631662
iframe
@@ -2083,9 +2082,7 @@ pseudorandomize
20832082
psql
20842083
ptrs
20852084
punycodeDecode
2086-
punycodeDecodeOrNull
20872085
punycodeEncode
2088-
punycodeEncodeOrNull
20892086
pushdown
20902087
pwrite
20912088
py
@@ -2532,6 +2529,8 @@ trimRight
25322529
trunc
25332530
tryBase
25342531
tryDecrypt
2532+
tryIdnaEncode
2533+
tryPunycodeDecode
25352534
tskv
25362535
tsv
25372536
tui

0 commit comments

Comments
 (0)