Remove special handling of "short" queries - the logic is too ad-hoc, it pollutes the code and is difficult to handle

alexey-milovidov · alexey-milovidov · commit 2d7d67e7bcc0 · 2022-04-29T04:27:57.000+02:00
diff --git a/docker/test/performance-comparison/README.md b/docker/test/performance-comparison/README.md
@@ -50,12 +50,6 @@ Action required for every item -- these are errors that must be fixed.
 
 A query is supposed to run longer than 0.1 second. If your query runs faster, increase the amount of processed data to bring the run time above this threshold. You can use a bigger table (e.g. `hits_100m` instead of `hits_10m`), increase a `LIMIT`, make a query single-threaded, and so on. Queries that are too fast suffer from poor stability and precision.
 
-Sometimes you want to test a query that is supposed to complete "instantaneously", i.e. in sublinear time. This might be `count(*)`, or parsing a complicated tuple. It might not be practical or even possible to increase the run time of such queries by adding more data. For such queries there is a specal comparison mode which runs them for a fixed amount of time, instead of a fixed number of iterations like we do normally. This mode is inferior to the normal mode, because the influence of noise and overhead is higher, which leads to less precise and stable results.
-
-If it is impossible to increase the run time of a query and it is supposed to complete "immediately", you have to explicitly mark this in the test. To do so, add a `short` attribute to the query tag in the test file: `<query short="1">...`. The value of the `short` attribute is evaluated as a python expression, and substitutions are performed, so you can write something like `<query short="{column1} = {column2}">select count(*) from table where {column1} > {column2}</query>`, to mark only a particular combination of variables as short.
-
-This table shows queries for which the `short` marking is not consistent with the actual query run time -- i.e., a query runs for a normal time but is marked as `short`, or it runs faster than normal but is not marked as `short`.
-
 #### Partial Queries
 Action required for the cells marked in red.
 
diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
@@ -376,7 +376,6 @@ do
     sed -n "s/^report-threshold\t/$test_name\t/p" < "$test_file" >> "analyze/report-thresholds.tsv"
     sed -n "s/^skipped\t/$test_name\t/p" < "$test_file" >> "analyze/skipped-tests.tsv"
     sed -n "s/^display-name\t/$test_name\t/p" < "$test_file" >> "analyze/query-display-names.tsv"
-    sed -n "s/^short\t/$test_name\t/p" < "$test_file" >> "analyze/marked-short-queries.tsv"
     sed -n "s/^partial\t/$test_name\t/p" < "$test_file" >> "analyze/partial-queries.tsv"
 done
 
@@ -817,23 +816,18 @@ create view query_runs as select * from file('analyze/query-runs.tsv', TSV,
 -- calculate and check the average query run time in the report.
 -- We have to be careful, because we will encounter:
 --  1) partial queries which run only on one server
---  2) short queries which run for a much higher number of times
 --  3) some errors that make query run for a different number of times on a
 --     particular server.
 --
 create view test_runs as
     select test,
         -- Default to 7 runs if there are only 'short' queries in the test, and
         -- we can't determine the number of runs.
-        if((ceil(medianOrDefaultIf(t.runs, not short), 0) as r) != 0, r, 7) runs
+        if((ceil(median(t.runs), 0) as r) != 0, r, 7) runs
     from (
         select
             -- The query id is the same for both servers, so no need to divide here.
             uniqExact(query_id) runs,
-            (test, query_index) in
-                (select * from file('analyze/marked-short-queries.tsv', TSV,
-                    'test text, query_index int'))
-            as short,
             test, query_index
         from query_runs
         group by test, query_index
@@ -918,41 +912,6 @@ create table all_tests_report engine File(TSV, 'report/all-queries.tsv')
     from queries order by test, query_index;
 
 
--- Report of queries that have inconsistent 'short' markings:
--- 1) have short duration, but are not marked as 'short'
--- 2) the reverse -- marked 'short' but take too long.
--- The threshold for 2) is significantly larger than the threshold for 1), to
--- avoid jitter.
-create view shortness
-    as select
-        (test, query_index) in
-            (select * from file('analyze/marked-short-queries.tsv', TSV,
-            'test text, query_index int'))
-            as marked_short,
-        time, test, query_index, query_display_name
-    from (
-            select right time, test, query_index from queries
-            union all
-            select time_median, test, query_index from partial_query_times
-        ) times
-        left join query_display_names
-            on times.test = query_display_names.test
-                and times.query_index = query_display_names.query_index
-    ;
-
-create table inconsistent_short_marking_report
-    engine File(TSV, 'report/unexpected-query-duration.tsv')
-    as select
-        multiIf(marked_short and time > 0.1, '\"short\" queries must run faster than 0.02 s',
-                not marked_short and time < 0.02, '\"normal\" queries must run longer than 0.1 s',
-                '') problem,
-        marked_short, time,
-        test, query_index, query_display_name
-    from shortness
-    where problem != ''
-    ;
-
-
 --------------------------------------------------------------------------------
 -- various compatibility data formats follow, not related to the main report
 
@@ -1432,4 +1391,3 @@ esac
 # Print some final debug info to help debug Weirdness, of which there is plenty.
 jobs
 pstree -apgT
-
diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py
@@ -168,17 +168,12 @@ def substitute_parameters(query_templates, other_templates=[]):
 
 
 # Build a list of test queries, substituting parameters to query templates,
-# and reporting the queries marked as short.
 test_queries = []
-is_short = []
 for e in root.findall("query"):
-    new_queries, [new_is_short] = substitute_parameters(
-        [e.text], [[e.attrib.get("short", "0")]]
+    new_queries = substitute_parameters(
+        [e.text]
     )
     test_queries += new_queries
-    is_short += [eval(s) for s in new_is_short]
-
-assert len(test_queries) == len(is_short)
 
 # If we're given a list of queries to run, check that it makes sense.
 for i in args.queries_to_run or []:
@@ -194,11 +189,6 @@ def substitute_parameters(query_templates, other_templates=[]):
         print(test_queries[i])
     exit(0)
 
-# Print short queries
-for i, s in enumerate(is_short):
-    if s:
-        print(f"short\t{i}")
-
 # If we're only asked to print the settings, do that and exit. These are settings
 # for clickhouse-benchmark, so we print them as command line arguments, e.g.
 # '--max_memory_usage=10000000'.
@@ -458,27 +448,10 @@ def do_create(connection, index, queries):
         # already.
         run += 1
 
-        # Try to run any query for at least the specified number of times,
-        # before considering other stop conditions.
-        if run < args.runs:
-            continue
-
-        # For very short queries we have a special mode where we run them for at
-        # least some time. The recommended lower bound of run time for "normal"
-        # queries is about 0.1 s, and we run them about 10 times, giving the
-        # time per query per server of about one second. Run "short" queries
-        # for longer time, because they have a high percentage of overhead and
-        # might give less stable results.
-        if is_short[query_index]:
-            if server_seconds >= 8 * len(this_query_connections):
-                break
-            # Also limit the number of runs, so that we don't go crazy processing
-            # the results -- 'eqmed.sql' is really suboptimal.
-            if run >= 500:
-                break
-        else:
-            if run >= args.runs:
-                break
+        # We break if all the min stop conditions are met (arg.runs iterations)
+        # and at lest one of the max stop conditions is met (8 seconds or 500 iterations)
+        if run >= args.runs and (server_seconds >= 8 * len(this_query_connections) or run >= 500):
+            break
 
     client_seconds = time.perf_counter() - start_seconds
     print(f"client-time\t{query_index}\t{client_seconds}\t{server_seconds}")
diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py
@@ -393,20 +393,6 @@ def add_errors_explained():
             ]
         )
 
-    unmarked_short_rows = tsvRows("report/unexpected-query-duration.tsv")
-    error_tests += len(unmarked_short_rows)
-    addSimpleTable(
-        "Unexpected Query Duration",
-        ["Problem", 'Marked as "short"?', "Run time, s", "Test", "#", "Query"],
-        unmarked_short_rows,
-    )
-    if unmarked_short_rows:
-        errors_explained.append(
-            [
-                f'<a href="#{currentTableAnchor()}">Some queries have unexpected duration</a>'
-            ]
-        )
-
     def add_partial():
         rows = tsvRows("report/partial-queries-report.tsv")
         if not rows: