ClickHouse · alexey-milovidov · Apr 23, 2023 · Apr 12, 2023 · Apr 12, 2023 · Apr 12, 2023
diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp
@@ -133,6 +133,13 @@ void IStorage::read(
     size_t num_streams)
 {
     auto pipe = read(column_names, storage_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
+
+    /// parallelize processing if not yet
+    const size_t output_ports = pipe.numOutputPorts();
+    const auto storage_name = getName();
+    if (parallelizeOutputAfterReading() && output_ports > 0 && output_ports < num_streams)
+        pipe.resize(num_streams);
+
     readFromPipe(query_plan, std::move(pipe), column_names, storage_snapshot, query_info, context, getName());
 }
 

diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
@@ -368,6 +368,15 @@ class IStorage : public std::enable_shared_from_this<IStorage>, public TypePromo
         size_t /*max_block_size*/,
         size_t /*num_streams*/);
 
+    /// Should we process blocks of data returned by the storage in parallel
+    /// even when the storage returned only one stream of data for reading?
+    /// It is beneficial, for example, when you read from a file quickly,
+    /// but then do heavy computations on returned blocks.
+    /// This is enabled by default, but in some cases shouldn't be done.
+    /// For example, when you read from system.numbers instead of system.numbers_mt,
+    /// you still expect the data to be processed sequentially.
+    virtual bool parallelizeOutputAfterReading() const { return true; }
+
 public:
     /// Other version of read which adds reading step to query plan.
     /// Default implementation creates ReadFromStorageStep and uses usual read.

diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h
@@ -74,6 +74,10 @@ friend class TableFunctionDictionary;
         size_t max_block_size,
         size_t threads) override;
 
+    /// FIXME: processing after reading from dictionaries are not parallelized due to some bug:
+    /// count() can return wrong result, see test_dictionaries_redis/test_long.py::test_redis_dict_long
+    bool parallelizeOutputAfterReading() const override { return false; }
+
     std::shared_ptr<const IDictionary> getDictionary() const;
 
     static NamesAndTypesList getNamesAndTypes(const DictionaryStructure & dictionary_structure);

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
@@ -791,15 +791,7 @@ Pipe StorageFile::read(
             std::move(read_buffer)));
     }
 
-    Pipe pipe = Pipe::unitePipes(std::move(pipes));
-    /// Parallelize output as much as possible
-    /// Note: number of streams can be 0 if paths is empty
-    ///       It happens if globs in file(path, ...) expands to empty set i.e. no files to process
-    if (num_streams > 0 && num_streams < max_num_streams)
-    {
-        pipe.resize(max_num_streams);
-    }
-    return pipe;
+    return Pipe::unitePipes(std::move(pipes));
 }
 
 

diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h
@@ -42,6 +42,8 @@ class StorageNull final : public IStorage
             std::make_shared<NullSource>(storage_snapshot->getSampleBlockForColumns(column_names)));
     }
 
+    bool parallelizeOutputAfterReading() const override { return false; }
+
     bool supportsParallelInsert() const override { return true; }
 
     SinkToStoragePtr write(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr) override

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
@@ -44,7 +44,6 @@
 #include <Processors/Transforms/AddingDefaultsTransform.h>
 #include <Processors/Formats/IOutputFormat.h>
 #include <Processors/Formats/IInputFormat.h>
-#include <QueryPipeline/narrowPipe.h>
 
 #include <QueryPipeline/QueryPipelineBuilder.h>
 
@@ -1076,10 +1075,7 @@ Pipe StorageS3::read(
             max_download_threads));
     }
 
-    auto pipe = Pipe::unitePipes(std::move(pipes));
-
-    narrowPipe(pipe, num_streams);
-    return pipe;
+    return Pipe::unitePipes(std::move(pipes));
 }
 
 SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context)

diff --git a/src/Storages/System/StorageSystemNumbers.h b/src/Storages/System/StorageSystemNumbers.h
@@ -40,6 +40,8 @@ class StorageSystemNumbers final : public IStorage
         size_t max_block_size,
         size_t num_streams) override;
 
+    bool parallelizeOutputAfterReading() const override { return false; }
+
     bool hasEvenlyDistributedRead() const override { return true; }
     bool isSystemStorage() const override { return true; }
     bool supportsTransactions() const override { return true; }

diff --git a/src/Storages/System/StorageSystemOne.h b/src/Storages/System/StorageSystemOne.h
@@ -30,6 +30,8 @@ class StorageSystemOne final : public IStorage
         size_t max_block_size,
         size_t num_streams) override;
 
+    bool parallelizeOutputAfterReading() const override { return false; }
+
     bool isSystemStorage() const override { return true; }
 
     bool supportsTransactions() const override { return true; }

diff --git a/src/Storages/System/StorageSystemZeros.h b/src/Storages/System/StorageSystemZeros.h
@@ -31,6 +31,8 @@ class StorageSystemZeros final : public IStorage
         size_t max_block_size,
         size_t num_streams) override;
 
+    bool parallelizeOutputAfterReading() const override { return false; }
+
     bool hasEvenlyDistributedRead() const override { return true; }
     bool isSystemStorage() const override { return true; }
     bool supportsTransactions() const override { return true; }

diff --git a/tests/integration/test_storage_mysql/test.py b/tests/integration/test_storage_mysql/test.py
@@ -519,21 +519,33 @@ def test_settings_connection_wait_timeout(started_cluster):
         )
     )
 
+    worker_started_event = threading.Event()
+
     def worker():
-        node1.query("SELECT sleepEachRow(1) FROM {}".format(table_name))
+        worker_started_event.set()
+        node1.query(
+            "SELECT 1, sleepEachRow(1) FROM {} SETTINGS max_threads=1".format(
+                table_name
+            )
+        )
 
     worker_thread = threading.Thread(target=worker)
     worker_thread.start()
 
     # ensure that first query started in worker_thread
+    assert worker_started_event.wait(10)
     time.sleep(1)
 
     started = time.time()
     with pytest.raises(
         QueryRuntimeException,
         match=r"Exception: mysqlxx::Pool is full \(connection_wait_timeout is exceeded\)",
     ):
-        node1.query("SELECT sleepEachRow(1) FROM {}".format(table_name))
+        node1.query(
+            "SELECT 2, sleepEachRow(1) FROM {} SETTINGS max_threads=1".format(
+                table_name
+            )
+        )
     ended = time.time()
     assert (ended - started) >= wait_timeout
 

diff --git a/tests/queries/0_stateless/00109_shard_totals_after_having.sql b/tests/queries/0_stateless/00109_shard_totals_after_having.sql
@@ -10,15 +10,15 @@ CREATE TABLE  numbers500k (number UInt32) ENGINE = TinyLog;
 INSERT INTO numbers500k SELECT number FROM system.numbers LIMIT 500000;
 
 SET totals_mode = 'after_having_auto';
-SELECT intDiv(number, 2) AS k, count(), argMax(toString(number), number) FROM remote('127.0.0.{2,3}', currentDatabase(), numbers500k) GROUP BY k WITH TOTALS ORDER BY k LIMIT 10;
+SELECT intDiv(number, 2) AS k, count(), argMax(toString(number), number) FROM (SELECT * FROM remote('127.0.0.{2,3}', currentDatabase(), numbers500k) ORDER BY number) GROUP BY k WITH TOTALS ORDER BY k LIMIT 10;
 
 SET totals_mode = 'after_having_inclusive';
-SELECT intDiv(number, 2) AS k, count(), argMax(toString(number), number) FROM remote('127.0.0.{2,3}', currentDatabase(), numbers500k) GROUP BY k WITH TOTALS ORDER BY k LIMIT 10;
+SELECT intDiv(number, 2) AS k, count(), argMax(toString(number), number) FROM (SELECT * FROM remote('127.0.0.{2,3}', currentDatabase(), numbers500k) ORDER BY number) GROUP BY k WITH TOTALS ORDER BY k LIMIT 10;
 
 SET totals_mode = 'after_having_exclusive';
-SELECT intDiv(number, 2) AS k, count(), argMax(toString(number), number) FROM remote('127.0.0.{2,3}', currentDatabase(), numbers500k) GROUP BY k WITH TOTALS ORDER BY k LIMIT 10;
+SELECT intDiv(number, 2) AS k, count(), argMax(toString(number), number) FROM (SELECT * FROM remote('127.0.0.{2,3}', currentDatabase(), numbers500k) ORDER BY number) GROUP BY k WITH TOTALS ORDER BY k LIMIT 10;
 
 SET totals_mode = 'before_having';
-SELECT intDiv(number, 2) AS k, count(), argMax(toString(number), number) FROM remote('127.0.0.{2,3}', currentDatabase(), numbers500k) GROUP BY k WITH TOTALS ORDER BY k LIMIT 10;
+SELECT intDiv(number, 2) AS k, count(), argMax(toString(number), number) FROM (SELECT * FROM remote('127.0.0.{2,3}', currentDatabase(), numbers500k) ORDER BY number) GROUP BY k WITH TOTALS ORDER BY k LIMIT 10;
 
 DROP TABLE numbers500k;
diff --git a/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql b/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql
@@ -28,7 +28,7 @@ create materialized view mv_02231 to buffer_02231 as select
 from in_02231
 group by key;
 
-insert into in_02231 select * from numbers(10e6) settings max_memory_usage='300Mi';
+insert into in_02231 select * from numbers(10e6) settings max_memory_usage='310Mi', max_threads=1;
 
 drop table buffer_02231;
 drop table out_02231;