Skip to content

Commit 266d0eb

Browse files
committed
[SPARK-32481] Support truncate table to move data to trash
1 parent eb74d55 commit 266d0eb

File tree

3 files changed

+31
-2
lines changed
  • core/src/main/scala/org/apache/spark/util
  • sql
    • catalyst/src/main/scala/org/apache/spark/sql/internal
    • core/src/main/scala/org/apache/spark/sql/execution/command

3 files changed

+31
-2
lines changed

core/src/main/scala/org/apache/spark/util/Utils.scala

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ import com.google.common.net.InetAddresses
5050
import org.apache.commons.codec.binary.Hex
5151
import org.apache.commons.lang3.SystemUtils
5252
import org.apache.hadoop.conf.Configuration
53-
import org.apache.hadoop.fs.{FileSystem, FileUtil, Path}
53+
import org.apache.hadoop.fs.{FileSystem, FileUtil, Path, Trash}
5454
import org.apache.hadoop.io.compress.{CompressionCodecFactory, SplittableCompressionCodec}
5555
import org.apache.hadoop.security.UserGroupInformation
5656
import org.apache.hadoop.yarn.conf.YarnConfiguration
@@ -269,6 +269,24 @@ private[spark] object Utils extends Logging {
269269
file.setExecutable(true, true)
270270
}
271271

272+
/**
273+
* Move data to trash on truncate table given
274+
* spark.sql.truncate.trash.interval is positive
275+
*/
276+
def moveToTrashIfEnabled(
277+
fs: FileSystem,
278+
partitionPath: Path,
279+
trashInterval: Int,
280+
hadoopConf: Configuration): Unit = {
281+
if (trashInterval < 0) {
282+
fs.delete(partitionPath, true)
283+
} else {
284+
logDebug(s"will move data ${partitionPath.toString} to trash")
285+
hadoopConf.setInt("fs.trash.interval", trashInterval)
286+
Trash.moveToAppropriateTrash(fs, partitionPath, hadoopConf)
287+
}
288+
}
289+
272290
/**
273291
* Create a directory given the abstract pathname
274292
* @return true, if the directory is successfully created; otherwise, return false.

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2701,6 +2701,13 @@ object SQLConf {
27012701
.booleanConf
27022702
.createWithDefault(false)
27032703

2704+
val TRUNCATE_TRASH_INTERVAL =
2705+
buildConf("spark.sql.truncate.trash.interval")
2706+
.doc("This Configuration will decide whether move files to trash on truncate table" +
2707+
"If -1 files will be deleted without moving to trash")
2708+
.intConf
2709+
.createWithDefault(-1)
2710+
27042711
/**
27052712
* Holds information about keys that have been deprecated.
27062713
*
@@ -3311,6 +3318,8 @@ class SQLConf extends Serializable with Logging {
33113318
def optimizeNullAwareAntiJoin: Boolean =
33123319
getConf(SQLConf.OPTIMIZE_NULL_AWARE_ANTI_JOIN)
33133320

3321+
def truncateTrashInterval: Int = getConf(SQLConf.TRUNCATE_TRASH_INTERVAL)
3322+
33143323
/** ********************** SQLConf functionality methods ************ */
33153324

33163325
/** Set Spark SQL configuration properties. */

sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetDataSourceV2
4848
import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
4949
import org.apache.spark.sql.types._
5050
import org.apache.spark.sql.util.SchemaUtils
51+
import org.apache.spark.util.Utils
5152

5253
/**
5354
* A command to create a table with the same definition of the given existing table.
@@ -489,6 +490,7 @@ case class TruncateTableCommand(
489490
}
490491
val hadoopConf = spark.sessionState.newHadoopConf()
491492
val ignorePermissionAcl = SQLConf.get.truncateTableIgnorePermissionAcl
493+
val trashInterval = SQLConf.get.truncateTrashInterval
492494
locations.foreach { location =>
493495
if (location.isDefined) {
494496
val path = new Path(location.get)
@@ -513,7 +515,7 @@ case class TruncateTableCommand(
513515
}
514516
}
515517

516-
fs.delete(path, true)
518+
Utils.moveToTrashIfEnabled(fs, path, trashInterval, hadoopConf)
517519

518520
// We should keep original permission/acl of the path.
519521
// For owner/group, only super-user can set it, for example on HDFS. Because

0 commit comments

Comments
 (0)