Skip to content

Commit bb0b08e

Browse files
author
darkhan.nausharipov
committed
Merge branch 'master' into issue24563_tobf_gradle
2 parents 6ebe89d + 95e5391 commit bb0b08e

176 files changed

Lines changed: 17902 additions & 1351 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/playground_examples_ci_reusable.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ jobs:
9191
working-directory: playground/infrastructure
9292
env:
9393
BEAM_ROOT_DIR: "../.."
94+
BEAM_EXAMPLE_CATEGORIES: "../categories.yaml"
9495

9596
ci_cd:
9697
name: ${{ inputs.step }} ${{ inputs.sdk }} ${{ inputs.origin }}

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ website/www/yarn-error.log
127127
**/.packages
128128
**/generated_plugin_registrant.dart
129129
playground/frontend/playground_components/pubspec.lock
130+
playground/frontend/playground_components/test/tools/extract_symbols_java/dependencies
131+
playground/frontend/playground_components_dev/pubspec.lock
130132

131133
# Ignore Beam Playground Terraform
132134
**/.terraform
@@ -136,4 +138,4 @@ playground/frontend/playground_components/pubspec.lock
136138
**/*.tfvars
137139

138140
# Ignore Katas auto-generated files
139-
**/*-remote-info.yaml
141+
**/*-remote-info.yaml
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
import CommonJobProperties as commonJobProperties
20+
import PhraseTriggeringPostCommitBuilder
21+
import CronJobBuilder
22+
23+
def cloudMLJob = { scope ->
24+
scope.description('Runs the TFT Criteo Examples on the Dataflow runner.')
25+
26+
// Set common parameters.
27+
commonJobProperties.setTopLevelMainJobProperties(scope, 'master', 360)
28+
29+
// Gradle goals for this job.
30+
scope.steps {
31+
gradle {
32+
rootBuildScriptDir(commonJobProperties.checkoutDir)
33+
commonJobProperties.setGradleSwitches(delegate)
34+
tasks(':sdks:python:test-suites:dataflow:tftTests')
35+
}
36+
}
37+
}
38+
39+
PhraseTriggeringPostCommitBuilder.postCommitJob(
40+
'beam_CloudML_Benchmarks_Dataflow',
41+
'Run TFT Criteo Benchmarks',
42+
'TFT Criteo benchmarks on Dataflow(\"Run TFT Criteo Benchmarks"\"")',
43+
this
44+
) {
45+
cloudMLJob(delegate)
46+
}
47+
48+
CronJobBuilder.cronJob(
49+
'beam_CloudML_Benchmarks_Dataflow',
50+
'H 14 * * *',
51+
this
52+
) {
53+
cloudMLJob(delegate)
54+
}

.test-infra/jenkins/job_LoadTests_Combine_Python.groovy

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,7 @@ def loadTestConfigurations = { datasetName, mode ->
100100

101101
def addStreamingOptions(test){
102102
test.pipelineOptions << [streaming: null,
103-
// TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved.
104-
experiments: "use_runner_v2,shuffle_mode=appliance"
103+
experiments: "use_runner_v2"
105104
]
106105
}
107106

.test-infra/jenkins/job_LoadTests_GBK_Python.groovy

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,7 @@ def addStreamingOptions(test) {
156156
// Use the new Dataflow runner, which offers improved efficiency of Dataflow jobs.
157157
// See https://cloud.google.com/dataflow/docs/guides/deploying-a-pipeline#dataflow-runner-v2
158158
// for more details.
159-
// TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved.
160-
experiments: 'use_runner_v2,shuffle_mode=appliance',
159+
experiments: 'use_runner_v2',
161160
]
162161
}
163162

.test-infra/jenkins/job_LoadTests_GBK_Python_reiterate.groovy

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,7 @@ def addStreamingOptions(test) {
8686
// Use the new Dataflow runner, which offers improved efficiency of Dataflow jobs.
8787
// See https://cloud.google.com/dataflow/docs/guides/deploying-a-pipeline#dataflow-runner-v2
8888
// for more details.
89-
// TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved.
90-
experiments: 'use_runner_v2,shuffle_mode=appliance',
89+
experiments: 'use_runner_v2',
9190
]
9291
}
9392

.test-infra/jenkins/job_LoadTests_ParDo_Python.groovy

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,7 @@ def addStreamingOptions(test) {
131131
// Use the new Dataflow runner, which offers improved efficiency of Dataflow jobs.
132132
// See https://cloud.google.com/dataflow/docs/guides/deploying-a-pipeline#dataflow-runner-v2
133133
// for more details.
134-
// TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved.
135-
experiments: 'use_runner_v2,shuffle_mode=appliance',
134+
experiments: 'use_runner_v2',
136135
]
137136
}
138137

.test-infra/jenkins/job_LoadTests_SideInput_Python.groovy

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@ def fromTemplate = { mode, name, id, datasetName, testSpecificOptions ->
3939
influx_measurement : "python_${mode}_sideinput_${id}",
4040
num_workers : 10,
4141
autoscaling_algorithm: 'NONE',
42-
// TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved.
43-
experiments : 'use_runner_v2,shuffle_mode=appliance',
42+
experiments : 'use_runner_v2',
4443
] << testSpecificOptions
4544
]
4645
}

.test-infra/jenkins/job_PerformanceTests_KafkaIO_IT.groovy

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,7 @@ job(jobName) {
106106
readTimeout : '1500',
107107
bigQueryTable : 'kafkaioit_results_runner_v2',
108108
influxMeasurement : 'kafkaioit_results_runner_v2',
109-
// TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved.
110-
experiments : 'use_runner_v2,shuffle_mode=appliance,use_unified_worker',
109+
experiments : 'use_runner_v2,use_unified_worker',
111110
]
112111

113112
steps {
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
'''
12+
This module queries GitHub to collect Beam-related workflows metrics and put them in
13+
PostgreSQL.
14+
This Script is running every 3 hours in a cloud function in apache-beam-testing project.
15+
This cloud function is triggered by a pubsub topic.
16+
You can find the cloud function in the next link
17+
https://console.cloud.google.com/functions/details/us-central1/github_actions_workflows_dashboard_sync?env=gen1&project=apache-beam-testing
18+
Pub sub topic : https://console.cloud.google.com/cloudpubsub/topic/detail/github_actions_workflows_sync?project=apache-beam-testing
19+
Cron Job : https://console.cloud.google.com/cloudscheduler/jobs/edit/us-central1/github_actions_workflows_dashboard_sync?project=apache-beam-testing
20+
Writing the latest 10 runs of every postcommit workflow in master branch in a beammetrics database
21+
'''
22+
23+
import os
24+
import sys
25+
import time
26+
import re
27+
import requests
28+
import psycopg2
29+
30+
from datetime import datetime
31+
from github import GithubIntegration
32+
33+
DB_HOST = os.environ['DB_HOST']
34+
DB_PORT = os.environ['DB_PORT']
35+
DB_NAME = os.environ['DB_NAME']
36+
DB_USER_NAME = os.environ['DB_USER']
37+
DB_PASSWORD = os.environ['DB_PASS']
38+
GH_WORKFLOWS_TABLE_NAME = "github_workflows"
39+
# Number of workflows that fetch github API
40+
GH_NUMBER_OF_WORKFLOWS = 100
41+
GH_WORKFLOWS_NUMBER_EXECUTIONS = 100
42+
WORKFLOWS_OBJECT_LIST = []
43+
44+
45+
class Workflow:
46+
def __init__(self,id,name,filename):
47+
self.id = id
48+
self.name = name
49+
self.filename = filename
50+
self.listOfRuns = []
51+
self.runUrl = []
52+
53+
# The table will save the latest ten run of every workflow
54+
GH_WORKFLOWS_CREATE_TABLE_QUERY = f"""
55+
CREATE TABLE IF NOT EXISTS {GH_WORKFLOWS_TABLE_NAME} (
56+
job_name text PRIMARY KEY,
57+
job_yml_filename text"""
58+
for i in range(0,GH_WORKFLOWS_NUMBER_EXECUTIONS):
59+
i = i + 1
60+
GH_WORKFLOWS_CREATE_TABLE_QUERY += """,\n run{} text,
61+
run{}Id text""".format(str(i),str(i))
62+
GH_WORKFLOWS_CREATE_TABLE_QUERY += ")\n"
63+
64+
def githubWorkflowsGrafanaSync(data,context):
65+
print('Started')
66+
print('Updating table with recent workflow runs')
67+
databaseOperations(initDbConnection(),fetchWorkflowData())
68+
print('Done')
69+
return "Completed"
70+
71+
def initDbConnection():
72+
'''Init connection with the Database'''
73+
connection = None
74+
maxRetries = 3
75+
i = 0
76+
while connection == None and i < maxRetries:
77+
try:
78+
connection = psycopg2.connect(
79+
f"dbname='{DB_NAME}' user='{DB_USER_NAME}' host='{DB_HOST}'"
80+
f" port='{DB_PORT}' password='{DB_PASSWORD}'")
81+
except Exception as e:
82+
print('Failed to connect to DB; retrying in 1 minute')
83+
print(e)
84+
time.sleep(60)
85+
i = i + 1
86+
if i >= maxRetries:
87+
print("Number of retries exceded ")
88+
sys.exit(1)
89+
return connection
90+
91+
def getToken():
92+
git_integration = GithubIntegration(
93+
os.environ["GH_APP_ID"],
94+
os.environ["GH_PEM_KEY"])
95+
token=git_integration.get_access_token(
96+
os.environ["GH_APP_INSTALLATION_ID"]
97+
).token
98+
return token
99+
100+
def retriesRequest(request):
101+
requestSucceeded = False
102+
retryFactor = 1
103+
while not requestSucceeded:
104+
retryTime = 60 * retryFactor
105+
if request.status_code != 200:
106+
print('Failed to get the request with code {}'.format(request.status_code))
107+
time.sleep(retryTime)
108+
retryFactor = retryFactor + retryFactor
109+
if retryFactor * 60 >= 3600:
110+
print("Error: The request take more than an hour")
111+
sys.exit(1)
112+
else:
113+
requestSucceeded = True
114+
def fetchWorkflowData():
115+
'''Return a json with all the workflows and the latests
116+
ten executions'''
117+
completed = False
118+
page = 1
119+
workflows = []
120+
try:
121+
while not completed:
122+
url = "https://api.github.com/repos/apache/beam/actions/workflows"
123+
queryOptions = { 'branch' : 'master', 'page': page, 'per_page' : GH_NUMBER_OF_WORKFLOWS }
124+
response = requests.get(url = url, params = queryOptions)
125+
retriesRequest(response)
126+
jsonResponse = response.json()
127+
if jsonResponse['total_count'] >= GH_NUMBER_OF_WORKFLOWS:
128+
page = page + 1
129+
workflowsPage = jsonResponse['workflows']
130+
workflows.append(workflowsPage)
131+
else:
132+
completed = True
133+
workflowsPage = jsonResponse['workflows']
134+
workflows.append(workflowsPage)
135+
for pageItem in workflows:
136+
for item in pageItem:
137+
path =item['path']
138+
isPostCommit = re.search('(.*)postcommit(.*)',path)
139+
if isPostCommit:
140+
result = re.search('/(.*).yml', path)
141+
path =(result.group(1)) + ".yml"
142+
workflowObject = Workflow(item['id'],item['name'],path)
143+
WORKFLOWS_OBJECT_LIST.append(workflowObject)
144+
url = "https://api.github.com/repos/apache/beam/actions/workflows/"
145+
queryOptions = { 'branch' : 'master', 'per_page' : GH_WORKFLOWS_NUMBER_EXECUTIONS,
146+
'page' :'1', 'exclude_pull_request':True }
147+
for workflow in WORKFLOWS_OBJECT_LIST:
148+
response = requests.get(url = "{}{}/runs".format(url,workflow.id),
149+
params=queryOptions)
150+
retriesRequest(response)
151+
responseJson = response.json()
152+
workflowsRuns = responseJson['workflow_runs']
153+
for item in workflowsRuns:
154+
if item['status'] == 'completed':
155+
workflow.runUrl.append(item['html_url'])
156+
workflow.listOfRuns.append(item['conclusion'])
157+
else:
158+
workflow.listOfRuns.append(item['status'])
159+
workflow.runUrl.append(item['html_url'])
160+
for i in range(0,GH_WORKFLOWS_NUMBER_EXECUTIONS):
161+
if i >= len(workflow.listOfRuns):
162+
workflow.listOfRuns.append('None')
163+
workflow.runUrl.append('None')
164+
except Exception as e:
165+
print('Failed to get GHA workflows')
166+
print(e)
167+
168+
def databaseOperations(connection,fetchWorkflows):
169+
'''Create the table if not exist and update the table with the latest runs
170+
of the workflows '''
171+
queryInsert = "INSERT INTO {} VALUES ".format(GH_WORKFLOWS_TABLE_NAME)
172+
cursor = connection.cursor()
173+
cursor.execute(GH_WORKFLOWS_CREATE_TABLE_QUERY)
174+
cursor.execute("DELETE FROM {};".format(GH_WORKFLOWS_TABLE_NAME))
175+
query = ""
176+
for workflow in WORKFLOWS_OBJECT_LIST:
177+
rowInsert = "(\'{}\',\'{}\'".format(workflow.name,workflow.filename)
178+
for run, runUrl in zip(workflow.listOfRuns,workflow.runUrl):
179+
rowInsert += ",\'{}\',\'{}\'".format(run,runUrl)
180+
query = query + rowInsert
181+
query += "),"
182+
query = query[:-1] + ";"
183+
query = queryInsert + query
184+
cursor.execute(query)
185+
cursor.close()
186+
connection.commit()
187+
connection.close()

0 commit comments

Comments
 (0)