Skip to content

Commit 227e054

Browse files
authored
Feat(oracle): support unicode strings u'...' (#3641)
1 parent 47472d9 commit 227e054

File tree

4 files changed

+25
-6
lines changed

4 files changed

+25
-6
lines changed

sqlglot/dialects/dialect.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ def get_start_end(token_type: TokenType) -> t.Tuple[t.Optional[str], t.Optional[
169169

170170
if enum not in ("", "athena", "presto", "trino"):
171171
klass.generator_class.TRY_SUPPORTED = False
172+
klass.generator_class.SUPPORTS_UESCAPE = False
172173

173174
if enum not in ("", "databricks", "hive", "spark", "spark2"):
174175
modifier_transforms = klass.generator_class.AFTER_HAVING_MODIFIER_TRANSFORMS.copy()

sqlglot/dialects/oracle.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,12 @@ class Oracle(Dialect):
231231
class Tokenizer(tokens.Tokenizer):
232232
VAR_SINGLE_TOKENS = {"@", "$", "#"}
233233

234+
UNICODE_STRINGS = [
235+
(prefix + q, q)
236+
for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES)
237+
for prefix in ("U", "u")
238+
]
239+
234240
KEYWORDS = {
235241
**tokens.Tokenizer.KEYWORDS,
236242
"(+)": TokenType.JOIN_MARKER,

sqlglot/generator.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,9 @@ class Generator(metaclass=_Generator):
356356
# Whether the conditional TRY(expression) function is supported
357357
TRY_SUPPORTED = True
358358

359+
# Whether the UESCAPE syntax in unicode strings is supported
360+
SUPPORTS_UESCAPE = True
361+
359362
# The keyword to use when generating a star projection with excluded columns
360363
STAR_EXCEPT = "EXCEPT"
361364

@@ -1143,16 +1146,23 @@ def unicodestring_sql(self, expression: exp.UnicodeString) -> str:
11431146
escape = expression.args.get("escape")
11441147

11451148
if self.dialect.UNICODE_START:
1146-
escape = f" UESCAPE {self.sql(escape)}" if escape else ""
1147-
return f"{self.dialect.UNICODE_START}{this}{self.dialect.UNICODE_END}{escape}"
1149+
escape_substitute = r"\\\1"
1150+
left_quote, right_quote = self.dialect.UNICODE_START, self.dialect.UNICODE_END
1151+
else:
1152+
escape_substitute = r"\\u\1"
1153+
left_quote, right_quote = self.dialect.QUOTE_START, self.dialect.QUOTE_END
11481154

11491155
if escape:
1150-
pattern = re.compile(rf"{escape.name}(\d+)")
1156+
escape_pattern = re.compile(rf"{escape.name}(\d+)")
1157+
escape_sql = f" UESCAPE {self.sql(escape)}" if self.SUPPORTS_UESCAPE else ""
11511158
else:
1152-
pattern = ESCAPED_UNICODE_RE
1159+
escape_pattern = ESCAPED_UNICODE_RE
1160+
escape_sql = ""
1161+
1162+
if not self.dialect.UNICODE_START or (escape and not self.SUPPORTS_UESCAPE):
1163+
this = escape_pattern.sub(escape_substitute, this)
11531164

1154-
this = pattern.sub(r"\\u\1", this)
1155-
return f"{self.dialect.QUOTE_START}{this}{self.dialect.QUOTE_END}"
1165+
return f"{left_quote}{this}{right_quote}{escape_sql}"
11561166

11571167
def rawstring_sql(self, expression: exp.RawString) -> str:
11581168
string = self.escape_str(expression.this.replace("\\", "\\\\"), escape_backslash=False)

tests/dialects/test_presto.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,7 @@ def test_unicode_string(self):
564564
self.validate_all(
565565
f"{prefix}'Hello winter \\2603 !'",
566566
write={
567+
"oracle": "U'Hello winter \\2603 !'",
567568
"presto": "U&'Hello winter \\2603 !'",
568569
"snowflake": "'Hello winter \\u2603 !'",
569570
"spark": "'Hello winter \\u2603 !'",
@@ -572,6 +573,7 @@ def test_unicode_string(self):
572573
self.validate_all(
573574
f"{prefix}'Hello winter #2603 !' UESCAPE '#'",
574575
write={
576+
"oracle": "U'Hello winter \\2603 !'",
575577
"presto": "U&'Hello winter #2603 !' UESCAPE '#'",
576578
"snowflake": "'Hello winter \\u2603 !'",
577579
"spark": "'Hello winter \\u2603 !'",

0 commit comments

Comments
 (0)