1- #include < IO/ReadHelpers.h>
2- #include < cstddef>
3- #include < iterator>
4- #include < memory>
51#include < string>
6- #include < tuple>
72#include < vector>
8- #include < type_traits>
9- #include < unordered_map>
103#include < Processors/Formats/Impl/NpyRowInputFormat.h>
11- #include < Formats/FormatFactory.h>
12- #include < Formats/EscapingRuleUtils.h>
13- #include < DataTypes/Serializations/SerializationNullable.h>
144#include < DataTypes/DataTypeString.h>
155#include < Common/assert_cast.h>
16- #include < Common/typeid_cast.h>
176#include < Common/Exception.h>
18- #include " Formats/NumpyDataTypes.h"
7+ #include < DataTypes/DataTypeArray.h>
8+ #include < DataTypes/DataTypesNumber.h>
9+ #include < Formats/FormatFactory.h>
10+ #include < Formats/NumpyDataTypes.h>
1911#include < Columns/ColumnFixedString.h>
20- #include < Core/TypeId.h>
21- #include < Core/Types_fwd.h>
2212#include < Columns/ColumnString.h>
2313#include < Columns/ColumnArray.h>
2414#include < Columns/ColumnsNumber.h>
25- #include < Storages/IStorage.h>
26- #include < Columns/IColumn.h>
27- #include < Core/Field.h>
28- #include < Core/NamesAndTypes.h>
29- #include < DataTypes/DataTypeArray.h>
30- #include < DataTypes/DataTypesNumber.h>
3115#include < DataTypes/IDataType.h>
32- #include < DataTypes/Serializations/ISerialization.h>
3316#include < IO/ReadBuffer.h>
34- #include < IO/WriteHelpers.h>
3517#include < Processors/Formats/IRowInputFormat.h>
36- #include < base/types.h>
3718#include < boost/algorithm/string/split.hpp>
3819#include < IO/ReadBufferFromString.h>
3920
@@ -99,15 +80,11 @@ DataTypePtr createNestedArrayType(const DataTypePtr & nested_type, size_t depth)
9980
10081size_t parseTypeSize (const std::string & size_str)
10182{
102- try
103- {
104- size_t size = std::stoi (size_str);
105- return size;
106- }
107- catch (...)
108- {
83+ ReadBufferFromString buf (size_str);
84+ size_t size;
85+ if (!tryReadIntText (size, buf))
10986 throw Exception (ErrorCodes::INCORRECT_DATA, " Invalid data type size: {}" , size_str);
110- }
87+ return size;
11188}
11289
11390std::shared_ptr<NumpyDataType> parseType (String type)
@@ -155,17 +132,14 @@ std::vector<int> parseShape(String shape_string)
155132 if (result_str[result_str.size ()-1 ].empty ())
156133 result_str.pop_back ();
157134 shape.reserve (result_str.size ());
158- bool is_first_elem = true ;
159135 for (const String & item : result_str)
160136 {
161137 int value;
162138 ReadBufferFromString buf (item);
163- if (!is_first_elem)
164- assertString (" " , buf);
139+ skipWhitespaceIfAny (buf);
165140 if (!tryReadIntText (value, buf))
166141 throw Exception (ErrorCodes::INCORRECT_DATA, " Invalid shape format: {}" , shape_string);
167142 shape.push_back (value);
168- is_first_elem = false ;
169143 }
170144 return shape;
171145}
@@ -316,8 +290,8 @@ void NpyRowInputFormat::readAndInsertInteger(IColumn * column, const DataTypePtr
316290 case NumpyDataTypeIndex::UInt32: readBinaryValueAndInsert<T, UInt32>(column->getPtr (), npy_type.getEndianness ()); break ;
317291 case NumpyDataTypeIndex::UInt64: readBinaryValueAndInsert<T, UInt64>(column->getPtr (), npy_type.getEndianness ()); break ;
318292 default :
319- throw Exception (ErrorCodes::ILLEGAL_COLUMN, " Cannot insert data type into column with type {}" ,
320- data_type->getName ());
293+ throw Exception (ErrorCodes::ILLEGAL_COLUMN, " Cannot insert Numpy value with type {} into column with type {}" ,
294+ magic_enum::enum_name (npy_type. getTypeIndex ()), data_type->getName ());
321295 }
322296}
323297
@@ -329,8 +303,8 @@ void NpyRowInputFormat::readAndInsertFloat(IColumn * column, const DataTypePtr &
329303 case NumpyDataTypeIndex::Float32: readBinaryValueAndInsert<T, Float32>(column->getPtr (), npy_type.getEndianness ()); break ;
330304 case NumpyDataTypeIndex::Float64: readBinaryValueAndInsert<T, Float64>(column->getPtr (), npy_type.getEndianness ()); break ;
331305 default :
332- throw Exception (ErrorCodes::ILLEGAL_COLUMN, " Cannot insert data type into column with type {}" ,
333- data_type->getName ());
306+ throw Exception (ErrorCodes::ILLEGAL_COLUMN, " Cannot insert Numpy value with type {} into column with type {}" ,
307+ magic_enum::enum_name (npy_type. getTypeIndex ()), data_type->getName ());
334308 }
335309}
336310
@@ -343,23 +317,19 @@ void NpyRowInputFormat::readAndInsertString(MutableColumnPtr column, const DataT
343317 else if (npy_type.getTypeIndex () == NumpyDataTypeIndex::Unicode)
344318 size = assert_cast<const NumpyDataTypeUnicode &>(npy_type).getSize ();
345319 else
346- throw Exception (ErrorCodes::ILLEGAL_COLUMN, " Cannot insert data type into column with type {}" ,
347- data_type->getName ());
320+ throw Exception (ErrorCodes::ILLEGAL_COLUMN, " Cannot insert Numpy value with type {} into column with type {}" ,
321+ magic_enum::enum_name (npy_type. getTypeIndex ()), data_type->getName ());
348322
349323 if (is_fixed)
350324 {
351325 auto & fixed_string_column = assert_cast<ColumnFixedString &>(*column);
352326 size_t n = fixed_string_column.getN ();
353327 if (size > n)
354328 throw Exception (ErrorCodes::TOO_LARGE_STRING_SIZE, " Too large string for FixedString column" );
355- fixed_string_column.getChars ().resize_fill (fixed_string_column.getChars ().size () + n);
356-
357- String tmp;
358- tmp.resize (size);
359-
360- in->readStrict (tmp.data (), size);
361- tmp.erase (std::remove (tmp.begin (), tmp.end (), ' \0 ' ), tmp.end ());
362- fixed_string_column.insertData (tmp.c_str (), tmp.size ());
329+ auto & chars = fixed_string_column.getChars ();
330+ size_t prev_size = chars.size ();
331+ chars.resize_fill (prev_size + n);
332+ in->readStrict (reinterpret_cast <char *>(chars.data () + prev_size), size);
363333 }
364334 else
365335 {
@@ -420,11 +390,6 @@ bool NpyRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & /*
420390 return true ;
421391}
422392
423- void NpyRowInputFormat::resetParser ()
424- {
425- IRowInputFormat::resetParser ();
426- }
427-
428393NpySchemaReader::NpySchemaReader (ReadBuffer & in_)
429394 : ISchemaReader(in_) {}
430395
0 commit comments