Skip to content

Commit a290814

Browse files
yjshenrxin
authored andcommitted
[SPARK-8866][SQL] use 1us precision for timestamp type
JIRA: https://issues.apache.org/jira/browse/SPARK-8866 Author: Yijie Shen <[email protected]> Closes #7283 from yijieshen/micro_timestamp and squashes the following commits: dc735df [Yijie Shen] update CastSuite to avoid round error 714eaea [Yijie Shen] add timestamp_udf into blacklist due to precision lose c3ca2f4 [Yijie Shen] fix unhandled case in CurrentTimestamp 8d4aa6b [Yijie Shen] use 1us precision for timestamp type
1 parent 28fa01e commit a290814

File tree

11 files changed

+50
-50
lines changed

11 files changed

+50
-50
lines changed

python/pyspark/sql/types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -775,7 +775,7 @@ def to_posix_timstamp(dt):
775775
if dt:
776776
seconds = (calendar.timegm(dt.utctimetuple()) if dt.tzinfo
777777
else time.mktime(dt.timetuple()))
778-
return int(seconds * 1e7 + dt.microsecond * 10)
778+
return int(seconds * 1e6 + dt.microsecond)
779779
return to_posix_timstamp
780780

781781
else:

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
186186
case ByteType =>
187187
buildCast[Byte](_, b => longToTimestamp(b.toLong))
188188
case DateType =>
189-
buildCast[Int](_, d => DateTimeUtils.daysToMillis(d) * 10000)
189+
buildCast[Int](_, d => DateTimeUtils.daysToMillis(d) * 1000)
190190
// TimestampWritable.decimalToTimestamp
191191
case DecimalType() =>
192192
buildCast[Decimal](_, d => decimalToTimestamp(d))
@@ -207,16 +207,16 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
207207
}
208208

209209
private[this] def decimalToTimestamp(d: Decimal): Long = {
210-
(d.toBigDecimal * 10000000L).longValue()
210+
(d.toBigDecimal * 1000000L).longValue()
211211
}
212212

213-
// converting milliseconds to 100ns
214-
private[this] def longToTimestamp(t: Long): Long = t * 10000L
215-
// converting 100ns to seconds
216-
private[this] def timestampToLong(ts: Long): Long = math.floor(ts.toDouble / 10000000L).toLong
217-
// converting 100ns to seconds in double
213+
// converting milliseconds to us
214+
private[this] def longToTimestamp(t: Long): Long = t * 1000L
215+
// converting us to seconds
216+
private[this] def timestampToLong(ts: Long): Long = math.floor(ts.toDouble / 1000000L).toLong
217+
// converting us to seconds in double
218218
private[this] def timestampToDouble(ts: Long): Double = {
219-
ts / 10000000.0
219+
ts / 1000000.0
220220
}
221221

222222
// DateConverter
@@ -229,7 +229,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
229229
case TimestampType =>
230230
// throw valid precision more than seconds, according to Hive.
231231
// Timestamp.nanos is in 0 to 999,999,999, no more than a second.
232-
buildCast[Long](_, t => DateTimeUtils.millisToDays(t / 10000L))
232+
buildCast[Long](_, t => DateTimeUtils.millisToDays(t / 1000L))
233233
// Hive throws this exception as a Semantic Exception
234234
// It is never possible to compare result when hive return with exception,
235235
// so we can return null

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeFunctions.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,6 @@ case class CurrentTimestamp() extends LeafExpression {
5151
override def dataType: DataType = TimestampType
5252

5353
override def eval(input: InternalRow): Any = {
54-
System.currentTimeMillis() * 10000L
54+
System.currentTimeMillis() * 1000L
5555
}
5656
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ object DateTimeUtils {
3434
// see http://stackoverflow.com/questions/466321/convert-unix-timestamp-to-julian
3535
final val JULIAN_DAY_OF_EPOCH = 2440587 // and .5
3636
final val SECONDS_PER_DAY = 60 * 60 * 24L
37-
final val HUNDRED_NANOS_PER_SECOND = 1000L * 1000L * 10L
38-
final val NANOS_PER_SECOND = HUNDRED_NANOS_PER_SECOND * 100
37+
final val MICROS_PER_SECOND = 1000L * 1000L
38+
final val NANOS_PER_SECOND = MICROS_PER_SECOND * 1000L
3939

4040

4141
// Java TimeZone has no mention of thread safety. Use thread local instance to be safe.
@@ -77,8 +77,8 @@ object DateTimeUtils {
7777
threadLocalDateFormat.get.format(toJavaDate(days))
7878

7979
// Converts Timestamp to string according to Hive TimestampWritable convention.
80-
def timestampToString(num100ns: Long): String = {
81-
val ts = toJavaTimestamp(num100ns)
80+
def timestampToString(us: Long): String = {
81+
val ts = toJavaTimestamp(us)
8282
val timestampString = ts.toString
8383
val formatted = threadLocalTimestampFormat.get.format(ts)
8484

@@ -132,52 +132,52 @@ object DateTimeUtils {
132132
}
133133

134134
/**
135-
* Returns a java.sql.Timestamp from number of 100ns since epoch.
135+
* Returns a java.sql.Timestamp from number of micros since epoch.
136136
*/
137-
def toJavaTimestamp(num100ns: Long): Timestamp = {
137+
def toJavaTimestamp(us: Long): Timestamp = {
138138
// setNanos() will overwrite the millisecond part, so the milliseconds should be
139139
// cut off at seconds
140-
var seconds = num100ns / HUNDRED_NANOS_PER_SECOND
141-
var nanos = num100ns % HUNDRED_NANOS_PER_SECOND
140+
var seconds = us / MICROS_PER_SECOND
141+
var micros = us % MICROS_PER_SECOND
142142
// setNanos() can not accept negative value
143-
if (nanos < 0) {
144-
nanos += HUNDRED_NANOS_PER_SECOND
143+
if (micros < 0) {
144+
micros += MICROS_PER_SECOND
145145
seconds -= 1
146146
}
147147
val t = new Timestamp(seconds * 1000)
148-
t.setNanos(nanos.toInt * 100)
148+
t.setNanos(micros.toInt * 1000)
149149
t
150150
}
151151

152152
/**
153-
* Returns the number of 100ns since epoch from java.sql.Timestamp.
153+
* Returns the number of micros since epoch from java.sql.Timestamp.
154154
*/
155155
def fromJavaTimestamp(t: Timestamp): Long = {
156156
if (t != null) {
157-
t.getTime() * 10000L + (t.getNanos().toLong / 100) % 10000L
157+
t.getTime() * 1000L + (t.getNanos().toLong / 1000) % 1000L
158158
} else {
159159
0L
160160
}
161161
}
162162

163163
/**
164-
* Returns the number of 100ns (hundred of nanoseconds) since epoch from Julian day
164+
* Returns the number of microseconds since epoch from Julian day
165165
* and nanoseconds in a day
166166
*/
167167
def fromJulianDay(day: Int, nanoseconds: Long): Long = {
168168
// use Long to avoid rounding errors
169169
val seconds = (day - JULIAN_DAY_OF_EPOCH).toLong * SECONDS_PER_DAY - SECONDS_PER_DAY / 2
170-
seconds * HUNDRED_NANOS_PER_SECOND + nanoseconds / 100L
170+
seconds * MICROS_PER_SECOND + nanoseconds / 1000L
171171
}
172172

173173
/**
174-
* Returns Julian day and nanoseconds in a day from the number of 100ns (hundred of nanoseconds)
174+
* Returns Julian day and nanoseconds in a day from the number of microseconds
175175
*/
176-
def toJulianDay(num100ns: Long): (Int, Long) = {
177-
val seconds = num100ns / HUNDRED_NANOS_PER_SECOND + SECONDS_PER_DAY / 2
176+
def toJulianDay(us: Long): (Int, Long) = {
177+
val seconds = us / MICROS_PER_SECOND + SECONDS_PER_DAY / 2
178178
val day = seconds / SECONDS_PER_DAY + JULIAN_DAY_OF_EPOCH
179179
val secondsInDay = seconds % SECONDS_PER_DAY
180-
val nanos = (num100ns % HUNDRED_NANOS_PER_SECOND) * 100L
180+
val nanos = (us % MICROS_PER_SECOND) * 1000L
181181
(day.toInt, secondsInDay * NANOS_PER_SECOND + nanos)
182182
}
183183
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -293,15 +293,15 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
293293
}
294294

295295
test("cast from timestamp") {
296-
val millis = 15 * 1000 + 2
297-
val seconds = millis * 1000 + 2
296+
val millis = 15 * 1000 + 3
297+
val seconds = millis * 1000 + 3
298298
val ts = new Timestamp(millis)
299299
val tss = new Timestamp(seconds)
300300
checkEvaluation(cast(ts, ShortType), 15.toShort)
301301
checkEvaluation(cast(ts, IntegerType), 15)
302302
checkEvaluation(cast(ts, LongType), 15.toLong)
303-
checkEvaluation(cast(ts, FloatType), 15.002f)
304-
checkEvaluation(cast(ts, DoubleType), 15.002)
303+
checkEvaluation(cast(ts, FloatType), 15.003f)
304+
checkEvaluation(cast(ts, DoubleType), 15.003)
305305
checkEvaluation(cast(cast(tss, ShortType), TimestampType), DateTimeUtils.fromJavaTimestamp(ts))
306306
checkEvaluation(cast(cast(tss, IntegerType), TimestampType),
307307
DateTimeUtils.fromJavaTimestamp(ts))
@@ -317,7 +317,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
317317
Decimal(1))
318318

319319
// A test for higher precision than millis
320-
checkEvaluation(cast(cast(0.0000001, TimestampType), DoubleType), 0.0000001)
320+
checkEvaluation(cast(cast(0.000001, TimestampType), DoubleType), 0.000001)
321321

322322
checkEvaluation(cast(Double.NaN, TimestampType), null)
323323
checkEvaluation(cast(1.0 / 0.0, TimestampType), null)

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ import org.apache.spark.SparkFunSuite
2424

2525
class DateTimeUtilsSuite extends SparkFunSuite {
2626

27-
test("timestamp and 100ns") {
27+
test("timestamp and us") {
2828
val now = new Timestamp(System.currentTimeMillis())
29-
now.setNanos(100)
29+
now.setNanos(1000)
3030
val ns = DateTimeUtils.fromJavaTimestamp(now)
31-
assert(ns % 10000000L === 1)
31+
assert(ns % 1000000L === 1)
3232
assert(DateTimeUtils.toJavaTimestamp(ns) === now)
3333

3434
List(-111111111111L, -1L, 0, 1L, 111111111111L).foreach { t =>
@@ -38,7 +38,7 @@ class DateTimeUtilsSuite extends SparkFunSuite {
3838
}
3939
}
4040

41-
test("100ns and julian day") {
41+
test("us and julian day") {
4242
val (d, ns) = DateTimeUtils.toJulianDay(0)
4343
assert(d === DateTimeUtils.JULIAN_DAY_OF_EPOCH)
4444
assert(ns === DateTimeUtils.SECONDS_PER_DAY / 2 * DateTimeUtils.NANOS_PER_SECOND)

sql/core/src/main/scala/org/apache/spark/sql/json/JacksonParser.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,10 @@ private[sql] object JacksonParser {
6767
DateTimeUtils.millisToDays(DateTimeUtils.stringToTime(parser.getText).getTime)
6868

6969
case (VALUE_STRING, TimestampType) =>
70-
DateTimeUtils.stringToTime(parser.getText).getTime * 10000L
70+
DateTimeUtils.stringToTime(parser.getText).getTime * 1000L
7171

7272
case (VALUE_NUMBER_INT, TimestampType) =>
73-
parser.getLongValue * 10000L
73+
parser.getLongValue * 1000L
7474

7575
case (_, StringType) =>
7676
val writer = new ByteArrayOutputStream()

sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -401,9 +401,9 @@ private[sql] object JsonRDD extends Logging {
401401

402402
private def toTimestamp(value: Any): Long = {
403403
value match {
404-
case value: java.lang.Integer => value.asInstanceOf[Int].toLong * 10000L
405-
case value: java.lang.Long => value * 10000L
406-
case value: java.lang.String => DateTimeUtils.stringToTime(value).getTime * 10000L
404+
case value: java.lang.Integer => value.asInstanceOf[Int].toLong * 1000L
405+
case value: java.lang.Long => value * 1000L
406+
case value: java.lang.String => DateTimeUtils.stringToTime(value).getTime * 1000L
407407
}
408408
}
409409

sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ class JDBCSuite extends SparkFunSuite with BeforeAndAfter {
326326
assert(cal.get(Calendar.HOUR) === 11)
327327
assert(cal.get(Calendar.MINUTE) === 22)
328328
assert(cal.get(Calendar.SECOND) === 33)
329-
assert(rows(0).getAs[java.sql.Timestamp](2).getNanos === 543543500)
329+
assert(rows(0).getAs[java.sql.Timestamp](2).getNanos === 543543000)
330330
}
331331

332332
test("test DATE types") {

sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,9 +254,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
254254
// the answer is sensitive for jdk version
255255
"udf_java_method",
256256

257-
// Spark SQL use Long for TimestampType, lose the precision under 100ns
257+
// Spark SQL use Long for TimestampType, lose the precision under 1us
258258
"timestamp_1",
259-
"timestamp_2"
259+
"timestamp_2",
260+
"timestamp_udf"
260261
)
261262

262263
/**
@@ -803,7 +804,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
803804
"timestamp_comparison",
804805
"timestamp_lazy",
805806
"timestamp_null",
806-
"timestamp_udf",
807807
"touch",
808808
"transform_ppr1",
809809
"transform_ppr2",

0 commit comments

Comments
 (0)