Skip to content

Commit 9200f38

Browse files
committed
Address comments.
1 parent 40a9735 commit 9200f38

File tree

3 files changed

+14
-8
lines changed

3 files changed

+14
-8
lines changed

python/pyspark/sql/session.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,13 +459,17 @@ def _convert_from_pandas(self, pdf, schema, timezone):
459459
if isinstance(field.dataType, TimestampType):
460460
s = _check_series_convert_timestamps_tz_local(pdf[field.name], timezone)
461461
if not copied and s is not pdf[field.name]:
462+
# Copy once if the series is modified to prevent the original Pandas
463+
# DataFrame from being updated
462464
pdf = pdf.copy()
463465
copied = True
464466
pdf[field.name] = s
465467
else:
466468
for column, series in pdf.iteritems():
467469
s = _check_series_convert_timestamps_tz_local(pdf[column], timezone)
468470
if not copied and s is not pdf[column]:
471+
# Copy once if the series is modified to prevent the original Pandas
472+
# DataFrame from being updated
469473
pdf = pdf.copy()
470474
copied = True
471475
pdf[column] = s

python/pyspark/sql/tests.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3291,6 +3291,7 @@ def test_createDataFrame_respect_session_timezone(self):
32913291

32923292
self.assertNotEqual(result_ny, result_la)
32933293

3294+
# Correct result_la by adjusting 3 hours difference between Los Angeles and New York
32943295
result_la_corrected = [Row(**{k: v - timedelta(hours=3) if k == '7_timestamp_t' else v
32953296
for k, v in row.asDict().items()})
32963297
for row in result_la]
@@ -3834,6 +3835,7 @@ def test_vectorized_udf_timestamps_respect_session_timezone(self):
38343835
df_la = df.withColumn("tscopy", f_timestamp_copy(col("timestamp"))) \
38353836
.withColumn("internal_value", internal_value(col("timestamp")))
38363837
result_la = df_la.select(col("idx"), col("internal_value")).collect()
3838+
# Correct result_la by adjusting 3 hours difference between Los Angeles and New York
38373839
diff = 3 * 60 * 60 * 1000 * 1000 * 1000
38383840
result_la_corrected = \
38393841
df_la.select(col("idx"), col("tscopy"), col("internal_value") + diff).collect()

python/pyspark/sql/types.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1729,28 +1729,28 @@ def _check_series_convert_timestamps_internal(s, timezone):
17291729
return s
17301730

17311731

1732-
def _check_series_convert_timestamps_localize(s, fromTimezone, toTimezone):
1732+
def _check_series_convert_timestamps_localize(s, from_timezone, to_timezone):
17331733
"""
17341734
Convert timestamp to timezone-naive in the specified timezone or local timezone
17351735
17361736
:param s: a pandas.Series
1737-
:param fromTimezone: the timezone to convert from. if None then use local timezone
1738-
:param toTimezone: the timezone to convert to. if None then use local timezone
1737+
:param from_timezone: the timezone to convert from. if None then use local timezone
1738+
:param to_timezone: the timezone to convert to. if None then use local timezone
17391739
:return pandas.Series where if it is a timestamp, has been converted to tz-naive
17401740
"""
17411741
try:
17421742
import pandas as pd
17431743
from pandas.api.types import is_datetime64tz_dtype, is_datetime64_dtype
17441744
except ImportError as e:
17451745
raise ImportError(_old_pandas_exception_message(e))
1746-
fromTz = fromTimezone or 'tzlocal()'
1747-
toTz = toTimezone or 'tzlocal()'
1746+
from_tz = from_timezone or 'tzlocal()'
1747+
to_tz = to_timezone or 'tzlocal()'
17481748
# TODO: handle nested timestamps, such as ArrayType(TimestampType())?
17491749
if is_datetime64tz_dtype(s.dtype):
1750-
return s.dt.tz_convert(toTz).dt.tz_localize(None)
1751-
elif is_datetime64_dtype(s.dtype) and fromTz != toTz:
1750+
return s.dt.tz_convert(to_tz).dt.tz_localize(None)
1751+
elif is_datetime64_dtype(s.dtype) and from_tz != to_tz:
17521752
# `s.dt.tz_localize('tzlocal()')` doesn't work properly when including NaT.
1753-
return s.apply(lambda ts: ts.tz_localize(fromTz).tz_convert(toTz).tz_localize(None)
1753+
return s.apply(lambda ts: ts.tz_localize(from_tz).tz_convert(to_tz).tz_localize(None)
17541754
if ts is not pd.NaT else pd.NaT)
17551755
else:
17561756
return s

0 commit comments

Comments
 (0)