Skip to content

Commit 99f01f7

Browse files
committed
Convert and port 'pgSQL/aggregates_part1.sql' into UDF test base
1 parent f15102b commit 99f01f7

File tree

3 files changed

+544
-1
lines changed

3 files changed

+544
-1
lines changed
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
--
2+
-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
3+
--
4+
--
5+
-- AGGREGATES [Part 1]
6+
-- https://github.com/postgres/postgres/blob/REL_12_BETA1/src/test/regress/sql/aggregates.sql#L1-L143
7+
8+
-- avoid bit-exact output here because operations may not be bit-exact.
9+
-- SET extra_float_digits = 0;
10+
11+
-- This test file was converted from pgSQL/aggregates_part1.sql.
12+
-- Note that currently registered UDF returns a string. So there are some differences, for instance
13+
-- in string cast within UDF in Scala and Python.
14+
15+
SELECT avg(udf(four)) AS avg_1 FROM onek;
16+
17+
SELECT udf(avg(a)) AS avg_32 FROM aggtest WHERE a < 100;
18+
19+
-- In 7.1, avg(float4) is computed using float8 arithmetic.
20+
-- Round the result to 3 digits to avoid platform-specific results.
21+
22+
select CAST(avg(udf(b)) AS Decimal(10,3)) AS avg_107_943 FROM aggtest;
23+
-- `student` has a column with data type POINT, which is not supported by Spark [SPARK-27766]
24+
-- SELECT avg(gpa) AS avg_3_4 FROM ONLY student;
25+
26+
SELECT sum(udf(four)) AS sum_1500 FROM onek;
27+
SELECT udf(sum(a)) AS sum_198 FROM aggtest;
28+
SELECT udf(udf(sum(b))) AS avg_431_773 FROM aggtest;
29+
-- `student` has a column with data type POINT, which is not supported by Spark [SPARK-27766]
30+
-- SELECT sum(gpa) AS avg_6_8 FROM ONLY student;
31+
32+
SELECT udf(max(four)) AS max_3 FROM onek;
33+
SELECT max(udf(a)) AS max_100 FROM aggtest;
34+
SELECT CAST(udf(udf(max(aggtest.b))) AS int) AS max_324_78 FROM aggtest;
35+
-- `student` has a column with data type POINT, which is not supported by Spark [SPARK-27766]
36+
-- SELECT max(student.gpa) AS max_3_7 FROM student;
37+
38+
SELECT CAST(stddev_pop(udf(b)) AS int) FROM aggtest;
39+
SELECT udf(stddev_samp(b)) FROM aggtest;
40+
SELECT CAST(var_pop(udf(b)) as int) FROM aggtest;
41+
SELECT udf(var_samp(b)) FROM aggtest;
42+
43+
SELECT udf(stddev_pop(CAST(b AS Decimal(38,0)))) FROM aggtest;
44+
SELECT stddev_samp(CAST(udf(b) AS Decimal(38,0))) FROM aggtest;
45+
SELECT udf(var_pop(CAST(b AS Decimal(38,0)))) FROM aggtest;
46+
SELECT var_samp(udf(CAST(b AS Decimal(38,0)))) FROM aggtest;
47+
48+
-- population variance is defined for a single tuple, sample variance
49+
-- is not
50+
SELECT udf(var_pop(1.0)), var_samp(udf(2.0));
51+
SELECT stddev_pop(udf(CAST(3.0 AS Decimal(38,0)))), stddev_samp(CAST(udf(4.0) AS Decimal(38,0)));
52+
53+
54+
-- verify correct results for null and NaN inputs
55+
select sum(udf(CAST(null AS int))) from range(1,4);
56+
select sum(udf(CAST(null AS long))) from range(1,4);
57+
select sum(udf(CAST(null AS Decimal(38,0)))) from range(1,4);
58+
select sum(udf(CAST(null AS DOUBLE))) from range(1,4);
59+
select avg(udf(CAST(null AS int))) from range(1,4);
60+
select avg(udf(CAST(null AS long))) from range(1,4);
61+
select avg(udf(CAST(null AS Decimal(38,0)))) from range(1,4);
62+
select avg(udf(CAST(null AS DOUBLE))) from range(1,4);
63+
select sum(CAST(udf('NaN') AS DOUBLE)) from range(1,4);
64+
select avg(CAST(udf('NaN') AS DOUBLE)) from range(1,4);
65+
66+
-- [SPARK-27768] verify correct results for infinite inputs
67+
-- [SPARK-28291] UDFs cannot be evaluated within inline table definition
68+
-- SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
69+
-- FROM (VALUES (CAST(udf('1') AS DOUBLE)), (CAST(udf('Infinity') AS DOUBLE))) v(x);
70+
SELECT avg(CAST(udf(x) AS DOUBLE)), var_pop(CAST(udf(x) AS DOUBLE))
71+
FROM (VALUES ('Infinity'), ('1')) v(x);
72+
SELECT avg(CAST(udf(x) AS DOUBLE)), var_pop(CAST(udf(x) AS DOUBLE))
73+
FROM (VALUES ('Infinity'), ('Infinity')) v(x);
74+
SELECT avg(CAST(udf(x) AS DOUBLE)), var_pop(CAST(udf(x) AS DOUBLE))
75+
FROM (VALUES ('-Infinity'), ('Infinity')) v(x);
76+
77+
78+
-- test accuracy with a large input offset
79+
SELECT avg(udf(CAST(x AS DOUBLE))), udf(var_pop(CAST(x AS DOUBLE)))
80+
FROM (VALUES (100000003), (100000004), (100000006), (100000007)) v(x);
81+
SELECT avg(udf(CAST(x AS DOUBLE))), udf(var_pop(CAST(x AS DOUBLE)))
82+
FROM (VALUES (7000000000005), (7000000000007)) v(x);
83+
84+
-- SQL2003 binary aggregates [SPARK-23907]
85+
-- SELECT regr_count(b, a) FROM aggtest;
86+
-- SELECT regr_sxx(b, a) FROM aggtest;
87+
-- SELECT regr_syy(b, a) FROM aggtest;
88+
-- SELECT regr_sxy(b, a) FROM aggtest;
89+
-- SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest;
90+
-- SELECT regr_r2(b, a) FROM aggtest;
91+
-- SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
92+
SELECT CAST(udf(covar_pop(b, udf(a))) AS int), CAST(covar_samp(udf(b), a) as int) FROM aggtest;
93+
SELECT corr(b, udf(a)) FROM aggtest;
94+
95+
96+
-- test accum and combine functions directly [SPARK-23907]
97+
-- CREATE TABLE regr_test (x float8, y float8);
98+
-- INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
99+
-- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
100+
-- FROM regr_test WHERE x IN (10,20,30,80);
101+
-- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
102+
-- FROM regr_test;
103+
-- SELECT float8_accum('{4,140,2900}'::float8[], 100);
104+
-- SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100);
105+
-- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
106+
-- FROM regr_test WHERE x IN (10,20,30);
107+
-- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
108+
-- FROM regr_test WHERE x IN (80,100);
109+
-- SELECT float8_combine('{3,60,200}'::float8[],ELECT CAST(udf(covar_pop(b, udf(a))) AS '{0,0,0}'::float8[]);
110+
-- SELECT float8_combine('{0,0,0}'::float8[], '{2,180,200}'::float8[]);
111+
-- SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]);
112+
-- SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
113+
-- '{0,0,0,0,0,0}'::float8[]);
114+
-- SELECT float8_regr_combine('{0,0,0,0,0,0}'::float8[],
115+
-- '{2,180,200,740,57800,-3400}'::float8[]);
116+
-- SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
117+
-- '{2,180,200,740,57800,-3400}'::float8[]);
118+
-- DROP TABLE regr_test;
119+
120+
121+
-- test count, distinct
122+
SELECT count(udf(four)) AS cnt_1000 FROM onek;
123+
SELECT udf(count(DISTINCT four)) AS cnt_4 FROM onek;
124+
125+
select ten, udf(count(*)), sum(udf(four)) from onek
126+
group by ten order by ten;
127+
128+
select ten, count(udf(four)), udf(sum(DISTINCT four)) from onek
129+
group by ten order by ten;
130+
131+
-- user-defined aggregates
132+
-- SELECT newavg(four) AS avg_1 FROM onek;
133+
-- SELECT newsum(four) AS sum_1500 FROM onek;
134+
-- SELECT newcnt(four) AS cnt_1000 FROM onek;
135+
-- SELECT newcnt(*) AS cnt_1000 FROM onek;
136+
-- SELECT oldcnt(*) AS cnt_1000 FROM onek;
137+
-- SELECT sum2(q1,q2) FROM int8_tbl;
138+
139+
-- test for outer-level aggregates
140+
141+
-- this should work
142+
select ten, udf(sum(distinct four)) from onek a
143+
group by ten
144+
having exists (select 1 from onek b where udf(sum(distinct a.four)) = b.four);
145+
146+
-- this should fail because subquery has an agg of its own in WHERE
147+
select ten, sum(distinct four) from onek a
148+
group by ten
149+
having exists (select 1 from onek b
150+
where sum(distinct a.four + b.four) = udf(b.four));
151+
152+
-- [SPARK-27769] Test handling of sublinks within outer-level aggregates.
153+
-- Per bug report from Daniel Grace.
154+
select
155+
(select udf(max((select i.unique2 from tenk1 i where i.unique1 = o.unique1))))
156+
from tenk1 o;

0 commit comments

Comments
 (0)