Cleaning up

AndreSchumacher · AndreSchumacher · commit 37e0a0a7e652 · 2014-06-19T18:03:42.000+03:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala
@@ -106,8 +106,9 @@ private[sql] object CatalystConverter {
     }
   }
 
-  protected[parquet] def createRootConverter(parquetSchema: MessageType, attributes: Seq[Attribute]): CatalystConverter = {
-    //val attributes = ParquetTypesConverter.convertToAttributes(parquetSchema)
+  protected[parquet] def createRootConverter(
+      parquetSchema: MessageType,
+      attributes: Seq[Attribute]): CatalystConverter = {
     // For non-nested types we use the optimized Row converter
     if (attributes.forall(a => ParquetTypesConverter.isPrimitiveType(a.dataType))) {
       new CatalystPrimitiveRowConverter(attributes)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
@@ -66,13 +66,9 @@ case class ParquetTableScan(
     }
 
     // Store Parquet schema in `Configuration`
-    // TODO: should this here be just the projected fields?
     conf.set(
       RowReadSupport.SPARK_ROW_REQUESTED_SCHEMA,
       ParquetTypesConverter.convertToString(output))
-    //conf.set(
-    //    RowReadSupport.PARQUET_ROW_REQUESTED_SCHEMA,
-    //    ParquetTypesConverter.convertFromAttributes(output).toString)
 
     // Store record filtering predicate in `Configuration`
     // Note 1: the input format ignores all predicates that cannot be expressed
@@ -181,7 +177,6 @@ case class InsertIntoParquetTable(
 
     ParquetOutputFormat.setWriteSupportClass(job, writeSupport)
 
-    // TODO: move that to function in object
     val conf = ContextUtil.getConfiguration(job)
     //conf.set(RowWriteSupport.PARQUET_ROW_SCHEMA, StructType.fromAttributes(relation.output).toString)
     RowWriteSupport.setSchema(relation.output, conf)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
@@ -59,7 +59,7 @@ private[parquet] class RowReadSupport extends ReadSupport[Row] with Logging {
       fileSchema: MessageType,
       readContext: ReadContext): RecordMaterializer[Row] = {
     log.debug(s"preparing for read with file schema $fileSchema")
-    //new RowRecordMaterializer(readContext.getRequestedSchema)
+    // Note: this very much imitates AvroParquet
     val parquetSchema = readContext.getRequestedSchema
     var schema: Seq[Attribute] =
       if (readContext.getReadSupportMetadata != null &&
@@ -77,17 +77,6 @@ private[parquet] class RowReadSupport extends ReadSupport[Row] with Logging {
       configuration: Configuration,
       keyValueMetaData: java.util.Map[String, String],
       fileSchema: MessageType): ReadContext = {
-    /*val requested_schema_string =
-      configuration.get(RowReadSupport.PARQUET_ROW_REQUESTED_SCHEMA, fileSchema.toString)
-    val requested_schema =
-      MessageTypeParser.parseMessageType(requested_schema_string)
-    log.debug(s"read support initialized for requested schema $requested_schema")
-    ParquetRelation.enableLogForwarding()
-    new ReadContext(requested_schema, keyValueMetaData) */
-
-    // GO ON HERE.. figure out why Avro distinguishes between requested read and read schema
-    // try to figure out what when needs to be written to metadata
-
     var parquetSchema: MessageType = fileSchema
     var metadata: java.util.Map[String, String] = null
     val requestedAttributes = RowReadSupport.getRequestedSchema(configuration)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala
@@ -325,7 +325,9 @@ private[parquet] object ParquetTypesConverter {
     }
     val extraMetadata = new java.util.HashMap[String, String]()
     extraMetadata.put("path", path.toString)
-    extraMetadata.put(RowReadSupport.SPARK_METADATA_KEY, ParquetTypesConverter.convertToString(attributes))
+    extraMetadata.put(
+      RowReadSupport.SPARK_METADATA_KEY,
+      ParquetTypesConverter.convertToString(attributes))
     // TODO: add extra data, e.g., table name, date, etc.?
 
     val parquetSchema: MessageType =

Original file line number	Diff line number	Diff line change
`@@ -106,8 +106,9 @@ private[sql] object CatalystConverter {`
`106`	`106`	`}`
`107`	`107`	`}`
`108`	`108`
`109`		`- protected[parquet] def createRootConverter(parquetSchema: MessageType, attributes: Seq[Attribute]): CatalystConverter = {`
`110`		`- //val attributes = ParquetTypesConverter.convertToAttributes(parquetSchema)`
	`109`	`+ protected[parquet] def createRootConverter(`
	`110`	`+ parquetSchema: MessageType,`
	`111`	`+ attributes: Seq[Attribute]): CatalystConverter = {`
`111`	`112`	`// For non-nested types we use the optimized Row converter`
`112`	`113`	`if (attributes.forall(a => ParquetTypesConverter.isPrimitiveType(a.dataType))) {`
`113`	`114`	`new CatalystPrimitiveRowConverter(attributes)`