Skip to content

Commit ca4e960

Browse files
aokolnychyiueshin
authored andcommitted
[SPARK-17914][SQL] Fix parsing of timestamp strings with nanoseconds
The PR contains a tiny change to fix the way Spark parses string literals into timestamps. Currently, some timestamps that contain nanoseconds are corrupted during the conversion from internal UTF8Strings into the internal representation of timestamps. Consider the following example: ``` spark.sql("SELECT cast('2015-01-02 00:00:00.000000001' as TIMESTAMP)").show(false) +------------------------------------------------+ |CAST(2015-01-02 00:00:00.000000001 AS TIMESTAMP)| +------------------------------------------------+ |2015-01-02 00:00:00.000001 | +------------------------------------------------+ ``` The fix was tested with existing tests. Also, there is a new test to cover cases that did not work previously. Author: aokolnychyi <[email protected]> Closes #18252 from aokolnychyi/spark-17914.
1 parent 22dd65f commit ca4e960

File tree

2 files changed

+23
-6
lines changed

2 files changed

+23
-6
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ import org.apache.spark.unsafe.types.UTF8String
3232
* Helper functions for converting between internal and external date and time representations.
3333
* Dates are exposed externally as java.sql.Date and are represented internally as the number of
3434
* dates since the Unix epoch (1970-01-01). Timestamps are exposed externally as java.sql.Timestamp
35-
* and are stored internally as longs, which are capable of storing timestamps with 100 nanosecond
35+
* and are stored internally as longs, which are capable of storing timestamps with microsecond
3636
* precision.
3737
*/
3838
object DateTimeUtils {
@@ -399,13 +399,14 @@ object DateTimeUtils {
399399
digitsMilli += 1
400400
}
401401

402-
if (!justTime && isInvalidDate(segments(0), segments(1), segments(2))) {
403-
return None
402+
// We are truncating the nanosecond part, which results in loss of precision
403+
while (digitsMilli > 6) {
404+
segments(6) /= 10
405+
digitsMilli -= 1
404406
}
405407

406-
// Instead of return None, we truncate the fractional seconds to prevent inserting NULL
407-
if (segments(6) > 999999) {
408-
segments(6) = segments(6).toString.take(6).toInt
408+
if (!justTime && isInvalidDate(segments(0), segments(1), segments(2))) {
409+
return None
409410
}
410411

411412
if (segments(3) < 0 || segments(3) > 23 || segments(4) < 0 || segments(4) > 59 ||

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,22 @@ class DateTimeUtilsSuite extends SparkFunSuite {
3434
((timestamp + tz.getOffset(timestamp)) / MILLIS_PER_DAY).toInt
3535
}
3636

37+
test("nanoseconds truncation") {
38+
def checkStringToTimestamp(originalTime: String, expectedParsedTime: String) {
39+
val parsedTimestampOp = DateTimeUtils.stringToTimestamp(UTF8String.fromString(originalTime))
40+
assert(parsedTimestampOp.isDefined, "timestamp with nanoseconds was not parsed correctly")
41+
assert(DateTimeUtils.timestampToString(parsedTimestampOp.get) === expectedParsedTime)
42+
}
43+
44+
checkStringToTimestamp("2015-01-02 00:00:00.123456789", "2015-01-02 00:00:00.123456")
45+
checkStringToTimestamp("2015-01-02 00:00:00.100000009", "2015-01-02 00:00:00.1")
46+
checkStringToTimestamp("2015-01-02 00:00:00.000050000", "2015-01-02 00:00:00.00005")
47+
checkStringToTimestamp("2015-01-02 00:00:00.12005", "2015-01-02 00:00:00.12005")
48+
checkStringToTimestamp("2015-01-02 00:00:00.100", "2015-01-02 00:00:00.1")
49+
checkStringToTimestamp("2015-01-02 00:00:00.000456789", "2015-01-02 00:00:00.000456")
50+
checkStringToTimestamp("1950-01-02 00:00:00.000456789", "1950-01-02 00:00:00.000456")
51+
}
52+
3753
test("timestamp and us") {
3854
val now = new Timestamp(System.currentTimeMillis())
3955
now.setNanos(1000)

0 commit comments

Comments
 (0)