Skip to content

Commit b952b41

Browse files
authored
Python TextIO Performance Test (#23951)
* Python TextIO Performance Test * Add filebasedio_perf_test module for unified test framework for Python file-based IOs * Fix MetricsReader publishes metrics duplicately if more than one load test declared. This is because MetricsReader.publishers was static class variable * Fix pylint * Distribute Python performance tests random time at a day instead of all at 3PM * Add information about length conversion
1 parent 017f2cb commit b952b41

9 files changed

Lines changed: 410 additions & 13 deletions

.test-infra/jenkins/job_PerformanceTests_BigQueryIO_Python.groovy

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ PhraseTriggeringPostCommitBuilder.postCommitJob(
9090
executeJob(delegate, bqio_read_test)
9191
}
9292

93-
CronJobBuilder.cronJob('beam_PerformanceTests_BiqQueryIO_Read_Python', 'H 15 * * *', this) {
93+
CronJobBuilder.cronJob('beam_PerformanceTests_BiqQueryIO_Read_Python', 'H H * * *', this) {
9494
executeJob(delegate, bqio_read_test)
9595
}
9696

@@ -103,6 +103,6 @@ PhraseTriggeringPostCommitBuilder.postCommitJob(
103103
executeJob(delegate, bqio_write_test)
104104
}
105105

106-
CronJobBuilder.cronJob('beam_PerformanceTests_BiqQueryIO_Write_Python_Batch', 'H 15 * * *', this) {
106+
CronJobBuilder.cronJob('beam_PerformanceTests_BiqQueryIO_Write_Python_Batch', 'H H * * *', this) {
107107
executeJob(delegate, bqio_write_test)
108108
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
import CommonJobProperties as common
20+
import LoadTestsBuilder as loadTestsBuilder
21+
import InfluxDBCredentialsHelper
22+
23+
def now = new Date().format("MMddHHmmss", TimeZone.getTimeZone('UTC'))
24+
25+
def jobs = [
26+
[
27+
name : 'beam_PerformanceTests_TextIOIT_Python',
28+
description : 'Runs performance tests for Python TextIOIT',
29+
test : 'apache_beam.io.filebasedio_perf_test',
30+
githubTitle : 'Python TextIO Performance Test',
31+
githubTriggerPhrase: 'Run Python TextIO Performance Test',
32+
pipelineOptions : [
33+
publish_to_big_query : true,
34+
metrics_dataset : 'beam_performance',
35+
metrics_table : 'python_textio_1GB_results',
36+
influx_measurement : 'python_textio_1GB_results',
37+
test_class : 'TextIOPerfTest',
38+
input_options : '\'{' +
39+
'"num_records": 25000000,' +
40+
'"key_size": 9,' +
41+
'"value_size": 21}\'',
42+
dataset_size : '1050000000',
43+
num_workers : '5',
44+
autoscaling_algorithm: 'NONE'
45+
]
46+
]
47+
]
48+
49+
jobs.findAll {
50+
it.name in [
51+
'beam_PerformanceTests_TextIOIT_Python',
52+
]
53+
}.forEach { testJob -> createGCSFileBasedIOITTestJob(testJob) }
54+
55+
private void createGCSFileBasedIOITTestJob(testJob) {
56+
job(testJob.name) {
57+
description(testJob.description)
58+
common.setTopLevelMainJobProperties(delegate)
59+
common.enablePhraseTriggeringFromPullRequest(delegate, testJob.githubTitle, testJob.githubTriggerPhrase)
60+
common.setAutoJob(delegate, 'H H * * *')
61+
InfluxDBCredentialsHelper.useCredentials(delegate)
62+
additionalPipelineArgs = [
63+
influxDatabase: InfluxDBCredentialsHelper.InfluxDBDatabaseName,
64+
influxHost: InfluxDBCredentialsHelper.InfluxDBHostUrl,
65+
]
66+
testJob.pipelineOptions.putAll(additionalPipelineArgs)
67+
68+
def dataflowSpecificOptions = [
69+
runner : 'DataflowRunner',
70+
project : 'apache-beam-testing',
71+
region : 'us-central1',
72+
temp_location : 'gs://temp-storage-for-perf-tests/',
73+
filename_prefix : "gs://temp-storage-for-perf-tests/${testJob.name}/\${BUILD_ID}/",
74+
]
75+
76+
Map allPipelineOptions = dataflowSpecificOptions << testJob.pipelineOptions
77+
78+
loadTestsBuilder.loadTest(
79+
delegate, testJob.name, CommonTestProperties.Runner.DATAFLOW, CommonTestProperties.SDK.PYTHON, allPipelineOptions, testJob.test)
80+
}
81+
}

.test-infra/jenkins/job_PerformanceTests_PubsubIO_Python.groovy

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,6 @@ PhraseTriggeringPostCommitBuilder.postCommitJob(
7070
executeJob(delegate, psio_test)
7171
}
7272

73-
CronJobBuilder.cronJob('beam_PerformanceTests_PubsubIOIT_Python_Streaming', 'H 15 * * *', this) {
73+
CronJobBuilder.cronJob('beam_PerformanceTests_PubsubIOIT_Python_Streaming', 'H H * * *', this) {
7474
executeJob(delegate, psio_test)
7575
}

.test-infra/jenkins/job_PerformanceTests_SpannerIO_Python.groovy

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ PhraseTriggeringPostCommitBuilder.postCommitJob(
9292
executeJob(delegate, spannerio_read_test_2gb)
9393
}
9494

95-
CronJobBuilder.cronJob('beam_PerformanceTests_SpannerIO_Read_2GB_Python', 'H 15 * * *', this) {
95+
CronJobBuilder.cronJob('beam_PerformanceTests_SpannerIO_Read_2GB_Python', 'H H * * *', this) {
9696
executeJob(delegate, spannerio_read_test_2gb)
9797
}
9898

@@ -105,6 +105,6 @@ PhraseTriggeringPostCommitBuilder.postCommitJob(
105105
executeJob(delegate, spannerio_write_test_2gb)
106106
}
107107

108-
CronJobBuilder.cronJob('beam_PerformanceTests_SpannerIO_Write_2GB_Python_Batch', 'H 15 * * *', this) {
108+
CronJobBuilder.cronJob('beam_PerformanceTests_SpannerIO_Write_2GB_Python_Batch', 'H H * * *', this) {
109109
executeJob(delegate, spannerio_write_test_2gb)
110110
}

.test-infra/metrics/grafana/dashboards/perftests_metrics/Python_IO_IT_Tests_Dataflow.json

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,128 @@
482482
"align": false,
483483
"alignLevel": null
484484
}
485+
},
486+
{
487+
"aliasColors": {},
488+
"bars": false,
489+
"cacheTimeout": null,
490+
"dashLength": 10,
491+
"dashes": false,
492+
"datasource": "BeamInfluxDB",
493+
"fill": 1,
494+
"fillGradient": 0,
495+
"gridPos": {
496+
"h": 9,
497+
"w": 12,
498+
"x": 12,
499+
"y": 9
500+
},
501+
"hiddenSeries": false,
502+
"id": 6,
503+
"interval": "24h",
504+
"legend": {
505+
"avg": false,
506+
"current": false,
507+
"max": false,
508+
"min": false,
509+
"show": false,
510+
"total": false,
511+
"values": false
512+
},
513+
"lines": true,
514+
"linewidth": 2,
515+
"links": [],
516+
"nullPointMode": "connected",
517+
"options": {
518+
"dataLinks": []
519+
},
520+
"percentage": false,
521+
"pluginVersion": "6.7.2",
522+
"pointradius": 2,
523+
"points": true,
524+
"renderer": "flot",
525+
"seriesOverrides": [],
526+
"spaceLength": 10,
527+
"stack": false,
528+
"steppedLine": false,
529+
"targets": [
530+
{
531+
"alias": "$tag_metric",
532+
"groupBy": [
533+
{
534+
"params": [
535+
"$__interval"
536+
],
537+
"type": "time"
538+
}
539+
],
540+
"measurement": "",
541+
"orderByTime": "ASC",
542+
"policy": "default",
543+
"query": "SELECT mean(\"value\") FROM \"python_textio_1GB_results\" WHERE \"metric\" = 'read_runtime' OR \"metric\" = 'write_runtime' AND $timeFilter GROUP BY time($__interval), \"metric\"",
544+
"rawQuery": true,
545+
"refId": "A",
546+
"resultFormat": "time_series",
547+
"select": [
548+
[
549+
{
550+
"params": [
551+
"value"
552+
],
553+
"type": "field"
554+
},
555+
{
556+
"params": [],
557+
"type": "mean"
558+
}
559+
]
560+
],
561+
"tags": []
562+
}
563+
],
564+
"thresholds": [],
565+
"timeFrom": null,
566+
"timeRegions": [],
567+
"timeShift": null,
568+
"title": "TextIO | GCS | 1 GB",
569+
"tooltip": {
570+
"shared": true,
571+
"sort": 0,
572+
"value_type": "individual"
573+
},
574+
"transparent": true,
575+
"type": "graph",
576+
"xaxis": {
577+
"buckets": null,
578+
"mode": "time",
579+
"name": null,
580+
"show": true,
581+
"values": []
582+
},
583+
"yaxes": [
584+
{
585+
"$$hashKey": "object:403",
586+
"format": "s",
587+
"label": null,
588+
"logBase": 1,
589+
"max": null,
590+
"min": null,
591+
"show": true
592+
},
593+
{
594+
"$$hashKey": "object:404",
595+
"format": "short",
596+
"label": null,
597+
"logBase": 1,
598+
"max": null,
599+
"min": null,
600+
"show": true
601+
}
602+
],
603+
"yaxis": {
604+
"align": false,
605+
"alignLevel": null
606+
}
485607
}
486608
],
487609
"schemaVersion": 22,

0 commit comments

Comments
 (0)