Skip to content

Commit 37e0a0a

Browse files
Cleaning up
1 parent 14c3fd8 commit 37e0a0a

File tree

4 files changed

+7
-20
lines changed

4 files changed

+7
-20
lines changed

sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,9 @@ private[sql] object CatalystConverter {
106106
}
107107
}
108108

109-
protected[parquet] def createRootConverter(parquetSchema: MessageType, attributes: Seq[Attribute]): CatalystConverter = {
110-
//val attributes = ParquetTypesConverter.convertToAttributes(parquetSchema)
109+
protected[parquet] def createRootConverter(
110+
parquetSchema: MessageType,
111+
attributes: Seq[Attribute]): CatalystConverter = {
111112
// For non-nested types we use the optimized Row converter
112113
if (attributes.forall(a => ParquetTypesConverter.isPrimitiveType(a.dataType))) {
113114
new CatalystPrimitiveRowConverter(attributes)

sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,9 @@ case class ParquetTableScan(
6666
}
6767

6868
// Store Parquet schema in `Configuration`
69-
// TODO: should this here be just the projected fields?
7069
conf.set(
7170
RowReadSupport.SPARK_ROW_REQUESTED_SCHEMA,
7271
ParquetTypesConverter.convertToString(output))
73-
//conf.set(
74-
// RowReadSupport.PARQUET_ROW_REQUESTED_SCHEMA,
75-
// ParquetTypesConverter.convertFromAttributes(output).toString)
7672

7773
// Store record filtering predicate in `Configuration`
7874
// Note 1: the input format ignores all predicates that cannot be expressed
@@ -181,7 +177,6 @@ case class InsertIntoParquetTable(
181177

182178
ParquetOutputFormat.setWriteSupportClass(job, writeSupport)
183179

184-
// TODO: move that to function in object
185180
val conf = ContextUtil.getConfiguration(job)
186181
//conf.set(RowWriteSupport.PARQUET_ROW_SCHEMA, StructType.fromAttributes(relation.output).toString)
187182
RowWriteSupport.setSchema(relation.output, conf)

sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ private[parquet] class RowReadSupport extends ReadSupport[Row] with Logging {
5959
fileSchema: MessageType,
6060
readContext: ReadContext): RecordMaterializer[Row] = {
6161
log.debug(s"preparing for read with file schema $fileSchema")
62-
//new RowRecordMaterializer(readContext.getRequestedSchema)
62+
// Note: this very much imitates AvroParquet
6363
val parquetSchema = readContext.getRequestedSchema
6464
var schema: Seq[Attribute] =
6565
if (readContext.getReadSupportMetadata != null &&
@@ -77,17 +77,6 @@ private[parquet] class RowReadSupport extends ReadSupport[Row] with Logging {
7777
configuration: Configuration,
7878
keyValueMetaData: java.util.Map[String, String],
7979
fileSchema: MessageType): ReadContext = {
80-
/*val requested_schema_string =
81-
configuration.get(RowReadSupport.PARQUET_ROW_REQUESTED_SCHEMA, fileSchema.toString)
82-
val requested_schema =
83-
MessageTypeParser.parseMessageType(requested_schema_string)
84-
log.debug(s"read support initialized for requested schema $requested_schema")
85-
ParquetRelation.enableLogForwarding()
86-
new ReadContext(requested_schema, keyValueMetaData) */
87-
88-
// GO ON HERE.. figure out why Avro distinguishes between requested read and read schema
89-
// try to figure out what when needs to be written to metadata
90-
9180
var parquetSchema: MessageType = fileSchema
9281
var metadata: java.util.Map[String, String] = null
9382
val requestedAttributes = RowReadSupport.getRequestedSchema(configuration)

sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,9 @@ private[parquet] object ParquetTypesConverter {
325325
}
326326
val extraMetadata = new java.util.HashMap[String, String]()
327327
extraMetadata.put("path", path.toString)
328-
extraMetadata.put(RowReadSupport.SPARK_METADATA_KEY, ParquetTypesConverter.convertToString(attributes))
328+
extraMetadata.put(
329+
RowReadSupport.SPARK_METADATA_KEY,
330+
ParquetTypesConverter.convertToString(attributes))
329331
// TODO: add extra data, e.g., table name, date, etc.?
330332

331333
val parquetSchema: MessageType =

0 commit comments

Comments
 (0)