Skip to content

Commit 16ee9e1

Browse files
authored
Merge pull request #622 from Altinity/23.8_automated_report
23.8 Automated Report and Known Fails
2 parents d130ef3 + 7de03f2 commit 16ee9e1

File tree

8 files changed

+538
-26
lines changed

8 files changed

+538
-26
lines changed
Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
#!/usr/bin/env python3
2+
import argparse
3+
import os
4+
from pathlib import Path
5+
from itertools import combinations
6+
import json
7+
8+
import requests
9+
from clickhouse_driver import Client
10+
import boto3
11+
from botocore.exceptions import NoCredentialsError
12+
13+
DATABASE_HOST_VAR = "CHECKS_DATABASE_HOST"
14+
DATABASE_USER_VAR = "CHECKS_DATABASE_USER"
15+
DATABASE_PASSWORD_VAR = "CHECKS_DATABASE_PASSWORD"
16+
S3_BUCKET = "altinity-build-artifacts"
17+
18+
19+
def get_checks_fails(client: Client, job_url: str):
20+
"""
21+
Get tests that did not succeed for the given job URL.
22+
Exclude checks that have status 'error' as they are counted in get_checks_errors.
23+
"""
24+
columns = (
25+
"check_status, check_name, test_status, test_name, report_url as results_link"
26+
)
27+
query = f"""SELECT {columns} FROM `gh-data`.checks
28+
WHERE task_url='{job_url}'
29+
AND test_status IN ('FAIL', 'ERROR')
30+
AND check_status!='error'
31+
ORDER BY check_name, test_name
32+
"""
33+
return client.query_dataframe(query)
34+
35+
36+
def get_checks_known_fails(client: Client, job_url: str, known_fails: dict):
37+
"""
38+
Get tests that are known to fail for the given job URL.
39+
"""
40+
assert len(known_fails) > 0, "cannot query the database with empty known fails"
41+
columns = (
42+
"check_status, check_name, test_status, test_name, report_url as results_link"
43+
)
44+
query = f"""SELECT {columns} FROM `gh-data`.checks
45+
WHERE task_url='{job_url}'
46+
AND test_status='BROKEN'
47+
AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())})
48+
ORDER BY test_name, check_name
49+
"""
50+
51+
df = client.query_dataframe(query)
52+
53+
if len(df) == 0:
54+
return df
55+
56+
df.insert(
57+
len(df.columns) - 1,
58+
"reason",
59+
df["test_name"]
60+
.cat.remove_unused_categories()
61+
.apply(
62+
lambda test_name: known_fails[test_name].get("reason", "No reason given")
63+
),
64+
)
65+
66+
return df
67+
68+
69+
def get_checks_errors(client: Client, job_url: str):
70+
"""
71+
Get checks that have status 'error' for the given job URL.
72+
"""
73+
columns = (
74+
"check_status, check_name, test_status, test_name, report_url as results_link"
75+
)
76+
query = f"""SELECT {columns} FROM `gh-data`.checks
77+
WHERE task_url='{job_url}'
78+
AND check_status=='error'
79+
ORDER BY check_name, test_name
80+
"""
81+
return client.query_dataframe(query)
82+
83+
84+
def drop_prefix_rows(df, column_to_clean):
85+
"""
86+
Drop rows from the dataframe if:
87+
- the row matches another row completely except for the specified column
88+
- the specified column of that row is a prefix of the same column in another row
89+
"""
90+
to_drop = set()
91+
reference_columns = [col for col in df.columns if col != column_to_clean]
92+
for (i, row_1), (j, row_2) in combinations(df.iterrows(), 2):
93+
if all(row_1[col] == row_2[col] for col in reference_columns):
94+
if row_2[column_to_clean].startswith(row_1[column_to_clean]):
95+
to_drop.add(i)
96+
elif row_1[column_to_clean].startswith(row_2[column_to_clean]):
97+
to_drop.add(j)
98+
return df.drop(to_drop)
99+
100+
101+
def get_regression_fails(client: Client, job_url: str):
102+
"""
103+
Get regression tests that did not succeed for the given job URL.
104+
"""
105+
# If you rename the alias for report_url, also update the formatters in format_results_as_html_table
106+
# Nested SELECT handles test reruns
107+
query = f"""SELECT arch, job_name, status, test_name, results_link
108+
FROM (
109+
SELECT
110+
architecture as arch,
111+
test_name,
112+
argMax(result, start_time) AS status,
113+
job_url,
114+
job_name,
115+
report_url as results_link
116+
FROM `gh-data`.clickhouse_regression_results
117+
GROUP BY architecture, test_name, job_url, job_name, report_url, start_time
118+
ORDER BY start_time DESC, length(test_name) DESC
119+
)
120+
WHERE job_url='{job_url}'
121+
AND status IN ('Fail', 'Error')
122+
"""
123+
df = client.query_dataframe(query)
124+
df = drop_prefix_rows(df, "test_name")
125+
df["job_name"] = df["job_name"].str.title()
126+
return df
127+
128+
129+
def url_to_html_link(url: str) -> str:
130+
if not url:
131+
return ""
132+
text = url.split("/")[-1]
133+
if not text:
134+
text = "results"
135+
return f'<a href="{url}">{text}</a>'
136+
137+
138+
def format_test_name_for_linewrap(text: str) -> str:
139+
"""Tweak the test name to improve line wrapping."""
140+
return text.replace(".py::", "/")
141+
142+
143+
def format_results_as_html_table(results) -> str:
144+
if len(results) == 0:
145+
return "<p>Nothing to report</p>"
146+
results.columns = [col.replace("_", " ").title() for col in results.columns]
147+
html = (
148+
results.to_html(
149+
index=False,
150+
formatters={
151+
"Results Link": url_to_html_link,
152+
"Test Name": format_test_name_for_linewrap,
153+
},
154+
escape=False,
155+
) # tbody/thead tags interfere with the table sorting script
156+
.replace("<tbody>\n", "")
157+
.replace("</tbody>\n", "")
158+
.replace("<thead>\n", "")
159+
.replace("</thead>\n", "")
160+
.replace('<table border="1"', '<table style="min-width: min(900px, 98vw);"')
161+
)
162+
return html
163+
164+
165+
def parse_args() -> argparse.Namespace:
166+
parser = argparse.ArgumentParser(description="Create a combined CI report.")
167+
parser.add_argument(
168+
"--actions-run-url", required=True, help="URL of the actions run"
169+
)
170+
parser.add_argument(
171+
"--pr-number", required=True, help="Pull request number for the S3 path"
172+
)
173+
parser.add_argument(
174+
"--commit-sha", required=True, help="Commit SHA for the S3 path"
175+
)
176+
parser.add_argument(
177+
"--no-upload", action="store_true", help="Do not upload the report"
178+
)
179+
parser.add_argument(
180+
"--known-fails", type=str, help="Path to the file with known fails"
181+
)
182+
parser.add_argument(
183+
"--mark-preview", action="store_true", help="Mark the report as a preview"
184+
)
185+
return parser.parse_args()
186+
187+
188+
def main():
189+
args = parse_args()
190+
191+
db_client = Client(
192+
host=os.getenv(DATABASE_HOST_VAR),
193+
user=os.getenv(DATABASE_USER_VAR),
194+
password=os.getenv(DATABASE_PASSWORD_VAR),
195+
port=9440,
196+
secure="y",
197+
verify=False,
198+
settings={"use_numpy": True},
199+
)
200+
201+
s3_path = (
202+
f"https://s3.amazonaws.com/{S3_BUCKET}/{args.pr_number}/{args.commit_sha}/"
203+
)
204+
report_destination_url = s3_path + "combined_report.html"
205+
ci_running_report_url = s3_path + "ci_running.html"
206+
207+
response = requests.get(ci_running_report_url)
208+
if response.status_code == 200:
209+
ci_running_report: str = response.text
210+
else:
211+
print(
212+
f"Failed to download CI running report. Status code: {response.status_code}, Response: {response.text}"
213+
)
214+
exit(1)
215+
216+
fail_results = {
217+
"checks_fails": get_checks_fails(db_client, args.actions_run_url),
218+
"checks_known_fails": [],
219+
"checks_errors": get_checks_errors(db_client, args.actions_run_url),
220+
"regression_fails": get_regression_fails(db_client, args.actions_run_url),
221+
}
222+
223+
if args.known_fails:
224+
if not os.path.exists(args.known_fails):
225+
print(f"Known fails file {args.known_fails} not found.")
226+
exit(1)
227+
228+
with open(args.known_fails) as f:
229+
known_fails = json.load(f)
230+
231+
if known_fails:
232+
fail_results["checks_known_fails"] = get_checks_known_fails(
233+
db_client, args.actions_run_url, known_fails
234+
)
235+
236+
combined_report = (
237+
ci_running_report.replace("ClickHouse CI Running for", "Combined CI Report for")
238+
.replace(
239+
"<table>",
240+
f"""<h2>Table of Contents</h2>
241+
{'<p style="font-weight: bold;color: #F00;">This is a preview. FinishCheck has not completed.</p>' if args.mark_preview else ""}
242+
<ul>
243+
<li><a href="#ci-jobs-status">CI Jobs Status</a></li>
244+
<li><a href="#checks-errors">Checks Errors</a> ({len(fail_results['checks_errors'])})</li>
245+
<li><a href="#checks-fails">Checks New Fails</a> ({len(fail_results['checks_fails'])})</li>
246+
<li><a href="#regression-fails">Regression New Fails</a> ({len(fail_results['regression_fails'])})</li>
247+
<li><a href="#checks-known-fails">Checks Known Fails</a> ({len(fail_results['checks_known_fails'])})</li>
248+
</ul>
249+
250+
<h2 id="ci-jobs-status">CI Jobs Status</h2>
251+
<table>""",
252+
1,
253+
)
254+
.replace(
255+
"</table>",
256+
f"""</table>
257+
258+
<h2 id="checks-errors">Checks Errors</h2>
259+
{format_results_as_html_table(fail_results['checks_errors'])}
260+
261+
<h2 id="checks-fails">Checks New Fails</h2>
262+
{format_results_as_html_table(fail_results['checks_fails'])}
263+
264+
<h2 id="regression-fails">Regression New Fails</h2>
265+
{format_results_as_html_table(fail_results['regression_fails'])}
266+
267+
<h2 id="checks-known-fails">Checks Known Fails</h2>
268+
{format_results_as_html_table(fail_results['checks_known_fails'])}
269+
""",
270+
1,
271+
)
272+
)
273+
report_path = Path("combined_report.html")
274+
report_path.write_text(combined_report, encoding="utf-8")
275+
276+
if args.no_upload:
277+
print(f"Report saved to {report_path}")
278+
exit(0)
279+
280+
# Upload the report to S3
281+
s3_client = boto3.client("s3")
282+
283+
try:
284+
s3_client.put_object(
285+
Bucket=S3_BUCKET,
286+
Key=f"{args.pr_number}/{args.commit_sha}/combined_report.html",
287+
Body=combined_report,
288+
ContentType="text/html; charset=utf-8",
289+
)
290+
except NoCredentialsError:
291+
print("Credentials not available for S3 upload.")
292+
293+
print(report_destination_url)
294+
295+
296+
if __name__ == "__main__":
297+
main()

0 commit comments

Comments
 (0)