Skip to content

Commit 9233997

Browse files
Merge pull request ClickHouse#11233 from nikitamikhaylov/cache-getstring-bugfix
Cache-dictionary getString() bugfix
2 parents cf08299 + 4f6b124 commit 9233997

File tree

3 files changed

+118
-26
lines changed

3 files changed

+118
-26
lines changed

src/Dictionaries/CacheDictionary.inc.h

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -302,37 +302,33 @@ void CacheDictionary::getItemsString(
302302

303303
/// Request new values sync.
304304
/// We have request both cache_not_found_ids and cache_expired_ids.
305-
if (!cache_not_found_ids.empty())
306-
{
307-
std::vector<Key> required_ids;
308-
required_ids.reserve(cache_not_found_ids.size() + cache_expired_ids.size());
309-
std::transform(
310-
std::begin(cache_not_found_ids), std::end(cache_not_found_ids),
311-
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
312-
std::transform(
313-
std::begin(cache_expired_ids), std::end(cache_expired_ids),
314-
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
315-
316-
auto on_cell_updated = [&] (const auto id, const auto cell_idx)
317-
{
318-
const auto attribute_value = attribute_array[cell_idx];
305+
std::vector<Key> required_ids;
306+
required_ids.reserve(cache_not_found_ids.size() + cache_expired_ids.size());
307+
std::transform(
308+
std::begin(cache_not_found_ids), std::end(cache_not_found_ids),
309+
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
310+
std::transform(
311+
std::begin(cache_expired_ids), std::end(cache_expired_ids),
312+
std::back_inserter(required_ids), [](auto & pair) { return pair.first; });
319313

320-
map[id] = String{attribute_value};
321-
total_length += (attribute_value.size + 1) * cache_not_found_ids[id].size();
322-
};
314+
auto on_cell_updated = [&] (const auto id, const auto cell_idx)
315+
{
316+
const auto attribute_value = attribute_array[cell_idx];
323317

324-
auto on_id_not_found = [&] (const auto id, const auto)
325-
{
326-
for (const auto row : cache_not_found_ids[id])
327-
total_length += get_default(row).size + 1;
328-
};
318+
map[id] = String{attribute_value};
319+
total_length += (attribute_value.size + 1) * cache_not_found_ids[id].size();
320+
};
329321

330-
auto update_unit_ptr = std::make_shared<UpdateUnit>(required_ids, on_cell_updated, on_id_not_found);
322+
auto on_id_not_found = [&] (const auto id, const auto)
323+
{
324+
for (const auto row : cache_not_found_ids[id])
325+
total_length += get_default(row).size + 1;
326+
};
331327

332-
tryPushToUpdateQueueOrThrow(update_unit_ptr);
333-
waitForCurrentUpdateFinish(update_unit_ptr);
334-
}
328+
auto update_unit_ptr = std::make_shared<UpdateUnit>(required_ids, on_cell_updated, on_id_not_found);
335329

330+
tryPushToUpdateQueueOrThrow(update_unit_ptr);
331+
waitForCurrentUpdateFinish(update_unit_ptr);
336332
out->getChars().reserve(total_length);
337333

338334
for (const auto row : ext::range(0, ext::size(ids)))
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
<yandex>
2+
<dictionary>
3+
<name>default_string</name>
4+
<source>
5+
<clickhouse>
6+
<host>dictionary_node</host>
7+
<port>9000</port>
8+
<user>default</user>
9+
<password></password>
10+
<db>test</db>
11+
<table>strings</table>
12+
</clickhouse>
13+
</source>
14+
<lifetime>
15+
<max>2</max>
16+
<min>1</min>
17+
</lifetime>
18+
<layout>
19+
<cache>
20+
<size_in_cells>1000</size_in_cells>
21+
<max_update_queue_size>10000</max_update_queue_size>
22+
</cache>
23+
</layout>
24+
<structure>
25+
<id>
26+
<name>key</name>
27+
</id>
28+
<attribute>
29+
<name>value</name>
30+
<type>String</type>
31+
<null_value></null_value>>
32+
</attribute>
33+
</structure>
34+
</dictionary>
35+
</yandex>
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from __future__ import print_function
2+
import pytest
3+
import os
4+
import random
5+
import string
6+
import time
7+
8+
from helpers.cluster import ClickHouseCluster
9+
from helpers.test_tools import TSV
10+
11+
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
12+
cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs'))
13+
14+
dictionary_node = cluster.add_instance('dictionary_node', stay_alive=True)
15+
main_node = cluster.add_instance('main_node', main_configs=['configs/dictionaries/cache_strings_default_settings.xml'])
16+
17+
18+
def get_random_string(string_length=8):
19+
alphabet = string.ascii_letters + string.digits
20+
return ''.join((random.choice(alphabet) for _ in range(string_length)))
21+
22+
@pytest.fixture(scope="module")
23+
def started_cluster():
24+
try:
25+
cluster.start()
26+
dictionary_node.query("CREATE DATABASE IF NOT EXISTS test;")
27+
dictionary_node.query("DROP TABLE IF EXISTS test.strings;")
28+
dictionary_node.query("""
29+
CREATE TABLE test.strings
30+
(key UInt64, value String)
31+
ENGINE = Memory;
32+
""")
33+
34+
values_to_insert = ", ".join(["({}, '{}')".format(1000000 + number, get_random_string()) for number in range(100)])
35+
dictionary_node.query("INSERT INTO test.strings VALUES {}".format(values_to_insert))
36+
37+
yield cluster
38+
finally:
39+
cluster.shutdown()
40+
41+
# @pytest.mark.skip(reason="debugging")
42+
def test_return_real_values(started_cluster):
43+
assert None != dictionary_node.get_process_pid("clickhouse"), "ClickHouse must be alive"
44+
45+
first_batch = """
46+
SELECT count(*)
47+
FROM
48+
(
49+
SELECT
50+
arrayJoin(arrayMap(x -> (x + 1000000), range(100))) AS id,
51+
dictGetString('default_string', 'value', toUInt64(id)) AS value
52+
)
53+
WHERE value = '';
54+
"""
55+
56+
assert TSV("0") == TSV(main_node.query(first_batch))
57+
58+
# Waiting for cache to become expired
59+
time.sleep(5)
60+
61+
assert TSV("0") == TSV(main_node.query(first_batch))

0 commit comments

Comments
 (0)