apache
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala‎
Lines changed: 1 addition & 8 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala‎
Lines changed: 1 addition & 8 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala‎
Lines changed: 13 additions & 37 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala‎
Lines changed: 13 additions & 37 deletions
diff --git a/‎sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala‎
Lines changed: 13 additions & 12 deletions b/‎sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala‎
Lines changed: 13 additions & 12 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala‎
Lines changed: 5 additions & 22 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala‎
Lines changed: 5 additions & 22 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala‎
Lines changed: 2 additions & 4 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala‎
Lines changed: 1 addition & 1 deletion b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala‎
Lines changed: 9 additions & 15 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala‎
Lines changed: 9 additions & 15 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala‎
Lines changed: 14 additions & 17 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala‎
Lines changed: 14 additions & 17 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala‎
Lines changed: 2 additions & 2 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala‎
Lines changed: 2 additions & 2 deletions
@@ -259,14 +259,7 @@ class SessionCatalog(
         identifier = tid,
         tableType = CatalogTableType.VIEW,
         storage = CatalogStorageFormat.empty,
-        schema = tempTables(table).output.map { c =>
-          CatalogColumn(
-            name = c.name,
-            dataType = c.dataType.catalogString,
-            nullable = c.nullable,
-            comment = Option(c.name)
-          )
-        },
+        schema = tempTables(table).output.toStructType,
         properties = Map(),
         viewText = None)
     } else {
 
@@ -18,14 +18,14 @@
 package org.apache.spark.sql.catalyst.catalog
 
 import java.util.Date
-import javax.annotation.Nullable
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan}
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
+import org.apache.spark.sql.types.StructType
 
 
 /**
@@ -77,28 +77,6 @@ object CatalogStorageFormat {
     outputFormat = None, serde = None, compressed = false, properties = Map.empty)
 }
 
-/**
- * A column in a table.
- */
-case class CatalogColumn(
-    name: String,
-    // TODO: make this type-safe; this is left as a string due to issues in converting Hive
-    // varchars to and from SparkSQL strings.
-    dataType: String,
-    nullable: Boolean = true,
-    comment: Option[String] = None) {
-
-  override def toString: String = {
-    val output =
-      Seq(s"`$name`",
-        dataType,
-        if (!nullable) "NOT NULL" else "",
-        comment.map("(" + _ + ")").getOrElse(""))
-    output.filter(_.nonEmpty).mkString(" ")
-  }
-
-}
-
 /**
  * A partition (Hive style) defined in the catalog.
  *
@@ -141,7 +119,7 @@ case class CatalogTable(
     identifier: TableIdentifier,
     tableType: CatalogTableType,
     storage: CatalogStorageFormat,
-    schema: Seq[CatalogColumn],
+    schema: StructType,
     partitionColumnNames: Seq[String] = Seq.empty,
     bucketSpec: Option[BucketSpec] = None,
     owner: String = "",
@@ -163,9 +141,10 @@ case class CatalogTable(
   requireSubsetOfSchema(bucketSpec.map(_.sortColumnNames).getOrElse(Nil), "sort")
   requireSubsetOfSchema(bucketSpec.map(_.bucketColumnNames).getOrElse(Nil), "bucket")
 
-  /** Columns this table is partitioned by. */
-  def partitionColumns: Seq[CatalogColumn] =
-    schema.filter { c => partitionColumnNames.contains(c.name) }
+  /** schema of this table's partition columns */
+  def partitionSchema: StructType = StructType(schema.filter {
+    c => partitionColumnNames.contains(c.name)
+  })
 
   /** Return the database this table was specified to belong to, assuming it exists. */
   def database: String = identifier.database.getOrElse {
@@ -277,16 +256,13 @@ case class SimpleCatalogRelation(
   override lazy val resolved: Boolean = false
 
   override val output: Seq[Attribute] = {
-    val cols = catalogTable.schema
-      .filter { c => !catalogTable.partitionColumnNames.contains(c.name) }
-    (cols ++ catalogTable.partitionColumns).map { f =>
-      AttributeReference(
-        f.name,
-        CatalystSqlParser.parseDataType(f.dataType),
-        // Since data can be dumped in randomly with no validation, everything is nullable.
-        nullable = true
-      )(qualifier = Some(metadata.identifier.table))
-    }
+    val (partCols, dataCols) = metadata.schema.toAttributes
+      // Since data can be dumped in randomly with no validation, everything is nullable.
+      .map(_.withNullability(true).withQualifier(Some(metadata.identifier.table)))
+      .partition { a =>
+        metadata.partitionColumnNames.contains(a.name)
+      }
+    dataCols ++ partCols
   }
 
   require(
 
@@ -25,6 +25,7 @@ import org.scalatest.BeforeAndAfterEach
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
 
@@ -551,7 +552,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
       identifier = TableIdentifier("my_table", Some("db1")),
       tableType = CatalogTableType.MANAGED,
       storage = CatalogStorageFormat(None, None, None, None, false, Map.empty),
-      schema = Seq(CatalogColumn("a", "int"), CatalogColumn("b", "string"))
+      schema = new StructType().add("a", "int").add("b", "string")
     )
 
     catalog.createTable("db1", table, ignoreIfExists = false)
@@ -570,7 +571,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
       storage = CatalogStorageFormat(
         Some(Utils.createTempDir().getAbsolutePath),
         None, None, None, false, Map.empty),
-      schema = Seq(CatalogColumn("a", "int"), CatalogColumn("b", "string"))
+      schema = new StructType().add("a", "int").add("b", "string")
     )
     catalog.createTable("db1", externalTable, ignoreIfExists = false)
     assert(!exists(db.locationUri, "external_table"))
@@ -583,11 +584,11 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
       identifier = TableIdentifier("tbl", Some("db1")),
       tableType = CatalogTableType.MANAGED,
       storage = CatalogStorageFormat(None, None, None, None, false, Map.empty),
-      schema = Seq(
-        CatalogColumn("col1", "int"),
-        CatalogColumn("col2", "string"),
-        CatalogColumn("a", "int"),
-        CatalogColumn("b", "string")),
+      schema = new StructType()
+        .add("col1", "int")
+        .add("col2", "string")
+        .add("a", "int")
+        .add("b", "string"),
       partitionColumnNames = Seq("a", "b")
     )
     catalog.createTable("db1", table, ignoreIfExists = false)
@@ -686,11 +687,11 @@ abstract class CatalogTestUtils {
       identifier = TableIdentifier(name, database),
       tableType = CatalogTableType.EXTERNAL,
       storage = storageFormat,
-      schema = Seq(
-        CatalogColumn("col1", "int"),
-        CatalogColumn("col2", "string"),
-        CatalogColumn("a", "int"),
-        CatalogColumn("b", "string")),
+      schema = new StructType()
+        .add("col1", "int")
+        .add("col2", "string")
+        .add("a", "int")
+        .add("b", "string"),
       partitionColumnNames = Seq("a", "b"),
       bucketSpec = Some(BucketSpec(4, Seq("col1"), Nil)))
   }
 
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation,
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.{CreateTempViewUsing, _}
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, VariableSubstitution}
-import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.types.{DataType, StructType}
 
 /**
  * Concrete parser for Spark SQL statements.
@@ -928,13 +928,13 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
       operationNotAllowed("CREATE TABLE ... CLUSTERED BY", ctx)
     }
     val comment = Option(ctx.STRING).map(string)
-    val partitionCols = Option(ctx.partitionColumns).toSeq.flatMap(visitCatalogColumns)
-    val cols = Option(ctx.columns).toSeq.flatMap(visitCatalogColumns)
+    val dataCols = Option(ctx.columns).map(visitColTypeList).getOrElse(Nil)
+    val partitionCols = Option(ctx.partitionColumns).map(visitColTypeList).getOrElse(Nil)
     val properties = Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty)
     val selectQuery = Option(ctx.query).map(plan)
 
     // Ensuring whether no duplicate name is used in table definition
-    val colNames = cols.map(_.name)
+    val colNames = dataCols.map(_.name)
     if (colNames.length != colNames.distinct.length) {
       val duplicateColumns = colNames.groupBy(identity).collect {
         case (x, ys) if ys.length > 1 => "\"" + x + "\""
@@ -952,7 +952,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
 
     // Note: Hive requires partition columns to be distinct from the schema, so we need
     // to include the partition columns here explicitly
-    val schema = cols ++ partitionCols
+    val schema = StructType(dataCols ++ partitionCols)
 
     // Storage format
     val defaultStorage: CatalogStorageFormat = {
@@ -1296,23 +1296,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
       isTemporary = isTemporary)
   }
 
-  /**
-   * Create a sequence of [[CatalogColumn]]s from a column list
-   */
-  private def visitCatalogColumns(ctx: ColTypeListContext): Seq[CatalogColumn] = withOrigin(ctx) {
-    ctx.colType.asScala.map { col =>
-      CatalogColumn(
-        col.identifier.getText.toLowerCase,
-        // Note: for types like "STRUCT<myFirstName: STRING, myLastName: STRING>" we can't
-        // just convert the whole type string to lower case, otherwise the struct field names
-        // will no longer be case sensitive. Instead, we rely on our parser to get the proper
-        // case before passing it to Hive.
-        typedVisit[DataType](col.dataType).catalogString,
-        nullable = true,
-        Option(col.STRING).map(string))
-    }
-  }
-
   /**
    * Create a [[ScriptInputOutputSchema]].
    */
 
@@ -395,7 +395,7 @@ object CreateDataSourceTableUtils extends Logging {
       CatalogTable(
         identifier = tableIdent,
         tableType = tableType,
-        schema = Nil,
+        schema = new StructType,
         storage = CatalogStorageFormat(
           locationUri = None,
           inputFormat = None,
@@ -424,9 +424,7 @@ object CreateDataSourceTableUtils extends Logging {
           compressed = false,
           properties = options
         ),
-        schema = relation.schema.map { f =>
-          CatalogColumn(f.name, f.dataType.catalogString)
-        },
+        schema = relation.schema,
         properties = tableProperties.toMap,
         viewText = None)
     }
 
@@ -518,7 +518,7 @@ object DDLUtils {
   }
 
   def isTablePartitioned(table: CatalogTable): Boolean = {
-    table.partitionColumns.nonEmpty || table.properties.contains(DATASOURCE_SCHEMA_NUMPARTCOLS)
+    table.partitionColumnNames.nonEmpty || table.properties.contains(DATASOURCE_SCHEMA_NUMPARTCOLS)
   }
 
   // A persisted data source table always store its schema in the catalog.
 
@@ -29,7 +29,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogColumn, CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
@@ -439,10 +439,10 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF
         describeSchema(StructType(partColNames.map(userSpecifiedSchema(_))), buffer)
       }
     } else {
-      if (table.partitionColumns.nonEmpty) {
+      if (table.partitionColumnNames.nonEmpty) {
         append(buffer, "# Partition Information", "", "")
         append(buffer, s"# ${output.head.name}", output(1).name, output(2).name)
-        describeSchema(table.partitionColumns, buffer)
+        describeSchema(table.partitionSchema, buffer)
       }
     }
   }
@@ -521,12 +521,6 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF
     }
   }
 
-  private def describeSchema(schema: Seq[CatalogColumn], buffer: ArrayBuffer[Row]): Unit = {
-    schema.foreach { column =>
-      append(buffer, column.name, column.dataType.toLowerCase, column.comment.orNull)
-    }
-  }
-
   private def describeSchema(schema: StructType, buffer: ArrayBuffer[Row]): Unit = {
     schema.foreach { column =>
       append(buffer, column.name, column.dataType.simpleString, column.getComment().orNull)
@@ -701,7 +695,7 @@ case class ShowPartitionsCommand(
      * thrown if the partitioning spec is invalid.
      */
     if (spec.isDefined) {
-      val badColumns = spec.get.keySet.filterNot(tab.partitionColumns.map(_.name).contains)
+      val badColumns = spec.get.keySet.filterNot(tab.partitionColumnNames.contains)
       if (badColumns.nonEmpty) {
         val badCols = badColumns.mkString("[", ", ", "]")
         throw new AnalysisException(
@@ -799,14 +793,14 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
       .foreach(builder.append)
   }
 
-  private def columnToDDLFragment(column: CatalogColumn): String = {
-    val comment = column.comment.map(escapeSingleQuotedString).map(" COMMENT '" + _ + "'")
-    s"${quoteIdentifier(column.name)} ${column.dataType}${comment.getOrElse("")}"
+  private def columnToDDLFragment(column: StructField): String = {
+    val comment = column.getComment().map(escapeSingleQuotedString).map(" COMMENT '" + _ + "'")
+    s"${quoteIdentifier(column.name)} ${column.dataType.catalogString}${comment.getOrElse("")}"
   }
 
   private def showHiveTableNonDataColumns(metadata: CatalogTable, builder: StringBuilder): Unit = {
-    if (metadata.partitionColumns.nonEmpty) {
-      val partCols = metadata.partitionColumns.map(columnToDDLFragment)
+    if (metadata.partitionColumnNames.nonEmpty) {
+      val partCols = metadata.partitionSchema.map(columnToDDLFragment)
       builder ++= partCols.mkString("PARTITIONED BY (", ", ", ")\n")
     }
 
 
@@ -21,10 +21,11 @@ import scala.util.control.NonFatal
 
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.{SQLBuilder, TableIdentifier}
-import org.apache.spark.sql.catalyst.catalog.{CatalogColumn, CatalogStorageFormat, CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
+import org.apache.spark.sql.types.StructType
 
 
 /**
@@ -161,18 +162,17 @@ case class CreateViewCommand(
    * SQL based on the analyzed plan, and also creates the proper schema for the view.
    */
   private def prepareTable(sparkSession: SparkSession, analyzedPlan: LogicalPlan): CatalogTable = {
-    val viewSQL: String = {
-      val logicalPlan = if (userSpecifiedColumns.isEmpty) {
-        analyzedPlan
-      } else {
-        val projectList = analyzedPlan.output.zip(userSpecifiedColumns).map {
-          case (attr, (colName, _)) => Alias(attr, colName)()
-        }
-        sparkSession.sessionState.executePlan(Project(projectList, analyzedPlan)).analyzed
+    val aliasedPlan = if (userSpecifiedColumns.isEmpty) {
+      analyzedPlan
+    } else {
+      val projectList = analyzedPlan.output.zip(userSpecifiedColumns).map {
+        case (attr, (colName, _)) => Alias(attr, colName)()
       }
-      new SQLBuilder(logicalPlan).toSQL
+      sparkSession.sessionState.executePlan(Project(projectList, analyzedPlan)).analyzed
     }
 
+    val viewSQL: String = new SQLBuilder(aliasedPlan).toSQL
+
     // Validate the view SQL - make sure we can parse it and analyze it.
     // If we cannot analyze the generated query, there is probably a bug in SQL generation.
     try {
@@ -184,14 +184,11 @@ case class CreateViewCommand(
     }
 
     val viewSchema = if (userSpecifiedColumns.isEmpty) {
-      analyzedPlan.output.map { a =>
-        CatalogColumn(a.name, a.dataType.catalogString)
-      }
+      aliasedPlan.schema
     } else {
-      analyzedPlan.output.zip(userSpecifiedColumns).map {
-        case (a, (name, comment)) =>
-          CatalogColumn(name, a.dataType.catalogString, comment = comment)
-      }
+      StructType(aliasedPlan.schema.zip(userSpecifiedColumns).map {
+        case (field, (_, comment)) => comment.map(field.withComment).getOrElse(field)
+      })
     }
 
     CatalogTable(
 
@@ -157,8 +157,8 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
     val columns = tableMetadata.schema.map { c =>
       new Column(
         name = c.name,
-        description = c.comment.orNull,
-        dataType = c.dataType,
+        description = c.getComment().orNull,
+        dataType = c.dataType.catalogString,
         nullable = c.nullable,
         isPartition = partitionColumnNames.contains(c.name),
         isBucket = bucketColumnNames.contains(c.name))
Original file line number	Diff line number	Diff line change
`@@ -518,7 +518,7 @@ object DDLUtils {`
`518`	`518`	`}`
`519`	`519`
`520`	`520`	`def isTablePartitioned(table: CatalogTable): Boolean = {`
`521`		`- table.partitionColumns.nonEmpty \|\| table.properties.contains(DATASOURCE_SCHEMA_NUMPARTCOLS)`
	`521`	`+ table.partitionColumnNames.nonEmpty \|\| table.properties.contains(DATASOURCE_SCHEMA_NUMPARTCOLS)`
`522`	`522`	`}`
`523`	`523`
`524`	`524`	`// A persisted data source table always store its schema in the catalog.`