Skip to content

Commit 2df1648

Browse files
author
Rong Rong
committed
add theta sketch integration tests
1 parent 0598955 commit 2df1648

File tree

1 file changed

+65
-43
lines changed

1 file changed

+65
-43
lines changed

pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/ThetaSketchIntegrationTest.java

Lines changed: 65 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -255,15 +255,15 @@ public void testThetaSketchQueryV2(boolean useMultiStageQueryEngine)
255255
int expected = 50 + 60 + 70 + 110 + 120 + 130;
256256
runAndAssert(query, expected);
257257

258-
/*
259-
query = "select distinctCountThetaSketch(thetaSketchCol, '', 'dimName = ''gender'' and dimValue = ''Female''', "
260-
+ "'$1') from " + DEFAULT_TABLE_NAME;
258+
query = "select getThetaSketchEstimate(distinctCountRAWThetaSketch(thetaSketchCol)"
259+
+ " FILTER (WHERE dimName = 'gender' and dimValue = 'Female')) from " + DEFAULT_TABLE_NAME;
261260
runAndAssert(query, expected);
262261

263-
query = "select distinctCountThetaSketch(thetaSketchCol, '', "
264-
+ "'dimName = ''gender''', 'dimValue = ''Female''', 'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
262+
query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT( "
263+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'gender'),"
264+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Female'))) "
265+
+ " FROM " + DEFAULT_TABLE_NAME;
265266
runAndAssert(query, expected);
266-
*/
267267
}
268268

269269
// gender = male
@@ -273,16 +273,15 @@ public void testThetaSketchQueryV2(boolean useMultiStageQueryEngine)
273273
int expected = 80 + 90 + 100 + 140 + 150 + 160;
274274
runAndAssert(query, expected);
275275

276-
/*
277-
query =
278-
"select distinctCountThetaSketch(thetaSketchCol, '', 'dimName = ''gender'' and dimValue = ''Male''', '$1') "
279-
+ "from " + DEFAULT_TABLE_NAME;
276+
query = "select getThetaSketchEstimate(distinctCountRAWThetaSketch(thetaSketchCol)"
277+
+ " FILTER (WHERE dimName = 'gender' and dimValue = 'Male')) from " + DEFAULT_TABLE_NAME;
280278
runAndAssert(query, expected);
281279

282-
query = "select distinctCountThetaSketch(thetaSketchCol, '', "
283-
+ "'dimName = ''gender''', 'dimValue = ''Male''', 'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
280+
query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT( "
281+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'gender'),"
282+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Male'))) "
283+
+ " FROM " + DEFAULT_TABLE_NAME;
284284
runAndAssert(query, expected);
285-
*/
286285
}
287286

288287
// course = math
@@ -292,66 +291,89 @@ public void testThetaSketchQueryV2(boolean useMultiStageQueryEngine)
292291
int expected = 50 + 80 + 110 + 140;
293292
runAndAssert(query, expected);
294293

295-
/*
296-
query =
297-
"select distinctCountThetaSketch(thetaSketchCol, '', 'dimName = ''course'' and dimValue = ''Math''', '$1') "
298-
+ "from " + DEFAULT_TABLE_NAME;
294+
query = "select getThetaSketchEstimate(distinctCountRAWThetaSketch(thetaSketchCol)"
295+
+ " FILTER (WHERE dimName = 'course' and dimValue = 'Math')) from " + DEFAULT_TABLE_NAME;
299296
runAndAssert(query, expected);
300297

301-
query = "select distinctCountThetaSketch(thetaSketchCol, '', "
302-
+ "'dimName = ''course''', 'dimValue = ''Math''', 'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
298+
query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT( "
299+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'course'),"
300+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Math'))) "
301+
+ " FROM " + DEFAULT_TABLE_NAME;
303302
runAndAssert(query, expected);
304-
*/
305303
}
306304

307-
/*
308305
// gender = female INTERSECT course = math
309306
{
310-
String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
311-
+ "'dimName = ''gender'' and dimValue = ''Female''', 'dimName = ''course'' and dimValue = ''Math''', "
312-
+ "'SET_INTERSECT($1, $2)') from " + DEFAULT_TABLE_NAME;
307+
String query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT( "
308+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER ("
309+
+ " WHERE dimName = 'gender' and dimValue = 'Female'), "
310+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER ("
311+
+ " WHERE dimName = 'course' and dimValue = 'Math'))) "
312+
+ " FROM " + DEFAULT_TABLE_NAME;
313313
int expected = 50 + 110;
314314
runAndAssert(query, expected);
315315

316-
query = "select distinctCountThetaSketch(thetaSketchCol, '', "
317-
+ "'dimName = ''gender''', 'dimValue = ''Female''', 'dimName = ''course''', 'dimValue = ''Math''', "
318-
+ "'SET_INTERSECT($1, $2, $3, $4)') from " + DEFAULT_TABLE_NAME;
316+
query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT( "
317+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'gender'), "
318+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Female'), "
319+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'course'), "
320+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Math'))) "
321+
+ " FROM " + DEFAULT_TABLE_NAME;
319322
runAndAssert(query, expected);
320323

321-
query = "select distinctCountThetaSketch(thetaSketchCol, '', "
322-
+ "'dimName = ''gender''', 'dimValue = ''Female''', 'dimName = ''course''', 'dimValue = ''Math''', "
323-
+ "'SET_INTERSECT(SET_INTERSECT($1, $2), SET_INTERSECT($3, $4))') from " + DEFAULT_TABLE_NAME;
324+
query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_INTERSECT(THETA_SKETCH_INTERSECT("
325+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'gender'), "
326+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Female')), "
327+
+ " THETA_SKETCH_INTERSECT("
328+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'course'), "
329+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Math')))) "
330+
+ " FROM " + DEFAULT_TABLE_NAME;
324331
runAndAssert(query, expected);
325332
}
326333

327334
// gender = male UNION course = biology
328335
{
329-
String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
330-
+ "'dimName = ''gender'' and dimValue = ''Male''', 'dimName = ''course'' and dimValue = ''Biology''', "
331-
+ "'SET_UNION($1, $2)') from " + DEFAULT_TABLE_NAME;
336+
String query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_UNION( "
337+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER ("
338+
+ " WHERE dimName = 'gender' and dimValue = 'Male'), "
339+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER ("
340+
+ " WHERE dimName = 'course' and dimValue = 'Biology'))) "
341+
+ " FROM " + DEFAULT_TABLE_NAME;
332342
int expected = 70 + 80 + 90 + 100 + 130 + 140 + 150 + 160;
333343
runAndAssert(query, expected);
334344

335-
query = "select distinctCountThetaSketch(thetaSketchCol, '', "
336-
+ "'dimName = ''gender''', 'dimValue = ''Male''', 'dimName = ''course''', 'dimValue = ''Biology''', "
337-
+ "'SET_UNION(SET_INTERSECT($1, $2), SET_INTERSECT($3, $4))') from " + DEFAULT_TABLE_NAME;
345+
query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_UNION("
346+
+ " THETA_SKETCH_INTERSECT("
347+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'gender'), "
348+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Male')), "
349+
+ " THETA_SKETCH_INTERSECT("
350+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'course'), "
351+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Biology')))) "
352+
+ " FROM " + DEFAULT_TABLE_NAME;
338353
runAndAssert(query, expected);
339354
}
340355

341356
// gender = female DIFF course = history
342357
{
343-
String query = "select distinctCountThetaSketch(thetaSketchCol, '', "
344-
+ "'dimName = ''gender'' and dimValue = ''Female''', 'dimName = ''course'' and dimValue = ''History''', "
345-
+ "'SET_DIFF($1, $2)') from " + DEFAULT_TABLE_NAME;
358+
String query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_DIFF( "
359+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER ("
360+
+ " WHERE dimName = 'gender' and dimValue = 'Female'), "
361+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER ("
362+
+ " WHERE dimName = 'course' and dimValue = 'History'))) "
363+
+ " FROM " + DEFAULT_TABLE_NAME;
346364
int expected = 50 + 110 + 70 + 130;
347365
runAndAssert(query, expected);
348366

349-
query = "select distinctCountThetaSketch(thetaSketchCol, '', "
350-
+ "'dimName = ''gender''', 'dimValue = ''Female''', 'dimName = ''course''', 'dimValue = ''History''', "
351-
+ "'SET_DIFF(SET_INTERSECT($1, $2), SET_INTERSECT($3, $4))') from " + DEFAULT_TABLE_NAME;
367+
query = "select GET_THETA_SKETCH_ESTIMATE(THETA_SKETCH_DIFF("
368+
+ " THETA_SKETCH_INTERSECT("
369+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'gender'), "
370+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'Female')), "
371+
+ " THETA_SKETCH_INTERSECT("
372+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimName = 'course'), "
373+
+ " DISTINCT_COUNT_RAW_THETA_SKETCH(thetaSketchCol, '') FILTER (WHERE dimValue = 'History')))) "
374+
+ " FROM " + DEFAULT_TABLE_NAME;
352375
runAndAssert(query, expected);
353376
}
354-
*/
355377

356378
// group by gender
357379
{

0 commit comments

Comments
 (0)