Skip to content

Commit 93fa266

Browse files
committed
Merge remote-tracking branch 'origin/master' into pqf
2 parents bfb7f40 + db4efe2 commit 93fa266

File tree

64 files changed

+1948
-194
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+1948
-194
lines changed

.github/workflows/hourly.yml

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# generated by praktika
2+
3+
name: Hourly
4+
on:
5+
schedule:
6+
- cron: 0 */3 * * 1-5
7+
workflow_dispatch:
8+
9+
concurrency:
10+
group: ${{ github.workflow }}
11+
12+
env:
13+
PYTHONUNBUFFERED: 1
14+
CHECKOUT_REF: ""
15+
16+
jobs:
17+
18+
config_workflow:
19+
runs-on: [self-hosted, style-checker-aarch64]
20+
needs: []
21+
name: "Config Workflow"
22+
outputs:
23+
data: ${{ steps.run.outputs.DATA }}
24+
pipeline_status: ${{ steps.run.outputs.pipeline_status }}
25+
steps:
26+
- name: Checkout code
27+
uses: actions/checkout@v4
28+
with:
29+
ref: ${{ env.CHECKOUT_REF }}
30+
31+
- name: Prepare env script
32+
run: |
33+
rm -rf ./ci/tmp ./ci/tmp ./ci/tmp
34+
mkdir -p ./ci/tmp ./ci/tmp ./ci/tmp
35+
cat > ./ci/tmp/praktika_setup_env.sh << 'ENV_SETUP_SCRIPT_EOF'
36+
export PYTHONPATH=./ci:.:
37+
38+
cat > ./ci/tmp/workflow_status.json << 'EOF'
39+
${{ toJson(needs) }}
40+
EOF
41+
ENV_SETUP_SCRIPT_EOF
42+
43+
- name: Run
44+
id: run
45+
run: |
46+
echo "pipeline_status=undefined" >> $GITHUB_OUTPUT
47+
. ./ci/tmp/praktika_setup_env.sh
48+
set -o pipefail
49+
if command -v ts &> /dev/null; then
50+
python3 -m praktika run 'Config Workflow' --workflow "Hourly" --ci |& ts '[%Y-%m-%d %H:%M:%S]' | tee ./ci/tmp/job.log
51+
else
52+
python3 -m praktika run 'Config Workflow' --workflow "Hourly" --ci |& tee ./ci/tmp/job.log
53+
fi
54+
55+
collect_flaky_tests:
56+
runs-on: [self-hosted, style-checker-aarch64]
57+
needs: [config_workflow]
58+
name: "Collect flaky tests"
59+
outputs:
60+
data: ${{ steps.run.outputs.DATA }}
61+
pipeline_status: ${{ steps.run.outputs.pipeline_status }}
62+
steps:
63+
- name: Checkout code
64+
uses: actions/checkout@v4
65+
with:
66+
ref: ${{ env.CHECKOUT_REF }}
67+
68+
- name: Prepare env script
69+
run: |
70+
rm -rf ./ci/tmp ./ci/tmp ./ci/tmp
71+
mkdir -p ./ci/tmp ./ci/tmp ./ci/tmp
72+
cat > ./ci/tmp/praktika_setup_env.sh << 'ENV_SETUP_SCRIPT_EOF'
73+
export PYTHONPATH=./ci:.:
74+
75+
cat > ./ci/tmp/workflow_status.json << 'EOF'
76+
${{ toJson(needs) }}
77+
EOF
78+
ENV_SETUP_SCRIPT_EOF
79+
80+
- name: Run
81+
id: run
82+
run: |
83+
echo "pipeline_status=undefined" >> $GITHUB_OUTPUT
84+
. ./ci/tmp/praktika_setup_env.sh
85+
set -o pipefail
86+
if command -v ts &> /dev/null; then
87+
python3 -m praktika run 'Collect flaky tests' --workflow "Hourly" --ci |& ts '[%Y-%m-%d %H:%M:%S]' | tee ./ci/tmp/job.log
88+
else
89+
python3 -m praktika run 'Collect flaky tests' --workflow "Hourly" --ci |& tee ./ci/tmp/job.log
90+
fi

ci/jobs/build_clickhouse.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,10 @@ def main():
129129
cmake_cmd += " -DCLICKHOUSE_OFFICIAL_BUILD=1"
130130

131131
# For PRs we prefer to build without debug symbols to save space and time (LTO is much faster)
132-
if info.pr_number != 0 and build_type in (BuildTypes.AMD_RELEASE, BuildTypes.ARM_RELEASE):
132+
if info.pr_number != 0 and build_type in (
133+
BuildTypes.AMD_RELEASE,
134+
BuildTypes.ARM_RELEASE,
135+
):
133136
cmake_cmd += " -DDISABLE_ALL_DEBUG_SYMBOLS=1"
134137

135138
cmake_cmd += f" {current_directory}"
Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
#!/usr/bin/env python3
2+
3+
import json
4+
import re
5+
from datetime import datetime, timedelta
6+
from typing import List, Optional
7+
8+
from ci.praktika.dataclasses import TestCaseIssue, TestCaseIssueCatalog
9+
from ci.praktika.result import Result
10+
from ci.praktika.s3 import S3
11+
from ci.praktika.utils import Shell, Utils
12+
from ci.settings.settings import S3_REPORT_BUCKET_NAME
13+
14+
15+
def extract_test_name(title: str, body: str) -> Optional[str]:
16+
"""
17+
Extract test name from issue title or body.
18+
Pattern: starts with 5-digit number + _* OR test_*
19+
"""
20+
# Combine title and body for searching
21+
text = f"{title}\n{body}"
22+
23+
# Pattern 1: 5-digit number followed by underscore and anything
24+
pattern1 = r"\b(\d{5}_\S+)"
25+
match1 = re.search(pattern1, text)
26+
if match1:
27+
test_name = match1.group(1)
28+
# Strip trailing quotes, backticks, and other punctuation
29+
return test_name.rstrip("`'\",.;:!?)")
30+
31+
# Pattern 2: test_ followed by anything (word characters, underscores)
32+
pattern2 = r"\b(test_\w+)"
33+
match2 = re.search(pattern2, text)
34+
if match2:
35+
test_name = match2.group(1)
36+
# Strip trailing quotes, backticks, and other punctuation
37+
return test_name.rstrip("`'\",.;:!?)")
38+
39+
return None
40+
41+
42+
def fetch_github_issues(
43+
label: str, state: str = "open", days_back: int = None
44+
) -> List[dict]:
45+
"""
46+
Fetch issues from GitHub using gh CLI with manual pagination.
47+
48+
Args:
49+
label: GitHub label to filter by
50+
state: Issue state (open or closed)
51+
days_back: For closed issues, only fetch those closed within this many days (default: None for all)
52+
53+
Returns:
54+
List of issue dictionaries
55+
"""
56+
all_issues = []
57+
limit_per_request = 1000 # Maximum we'll fetch per request
58+
59+
# Build base command
60+
if state == "closed" and days_back:
61+
date_threshold = (datetime.now() - timedelta(days=days_back)).strftime(
62+
"%Y-%m-%d"
63+
)
64+
search_query = f'label:"{label}" is:closed closed:>{date_threshold}'
65+
base_cmd = f"gh issue list --search '{search_query}' --json number,title,body,closedAt --limit {limit_per_request}"
66+
print(
67+
f"Fetching {state} issues with label '{label}' closed in last {days_back} days (since {date_threshold})..."
68+
)
69+
else:
70+
base_cmd = f'gh issue list --label "{label}" --state {state} --json number,title,body,closedAt --limit {limit_per_request}'
71+
print(f"Fetching {state} issues with label '{label}'...")
72+
73+
try:
74+
output = Shell.get_output(base_cmd, verbose=True)
75+
76+
if not output or not output.strip():
77+
print(f" No issues found for label '{label}' with state '{state}'")
78+
return []
79+
80+
issues = json.loads(output)
81+
if not issues:
82+
print(f" No issues found for label '{label}' with state '{state}'")
83+
return []
84+
85+
all_issues.extend(issues)
86+
print(f" Found {len(all_issues)} issues")
87+
88+
# Check if we hit the limit and warn user
89+
if len(issues) == limit_per_request:
90+
print(
91+
f" WARNING: Reached limit of {limit_per_request} issues. There may be more issues not fetched."
92+
)
93+
94+
return all_issues
95+
except json.JSONDecodeError as e:
96+
print(f"ERROR: Failed to parse JSON response for label '{label}': {e}")
97+
return []
98+
except Exception as e:
99+
print(f"ERROR: Failed to fetch issues with label '{label}': {e}")
100+
return []
101+
102+
103+
def process_issues(issues: List[dict], is_closed: bool = False) -> List[TestCaseIssue]:
104+
"""
105+
Process raw GitHub issues into TestCaseIssue objects.
106+
107+
Args:
108+
issues: List of raw issue dictionaries from GitHub
109+
is_closed: Whether these are closed issues
110+
111+
Returns:
112+
List of TestCaseIssue objects
113+
"""
114+
test_case_issues = []
115+
116+
for issue in issues:
117+
number = issue.get("number", "")
118+
title = issue.get("title", "")
119+
body = issue.get("body", "")
120+
closed_at = issue.get("closedAt", "")
121+
122+
# Extract test name from title or body
123+
test_name = extract_test_name(title, body)
124+
125+
if not test_name:
126+
print(
127+
f" Warning: Could not extract test name from issue #{number}: {title}"
128+
)
129+
test_name = "unknown"
130+
131+
# Construct GitHub issue URL
132+
issue_url = f"https://github.com/ClickHouse/ClickHouse/issues/{number}"
133+
134+
test_case_issue = TestCaseIssue(
135+
test_name=test_name,
136+
closed_at=closed_at if closed_at else "",
137+
issue=int(number),
138+
issue_url=issue_url,
139+
title=title,
140+
body=body if body else "",
141+
)
142+
test_case_issues.append(test_case_issue)
143+
144+
return test_case_issues
145+
146+
147+
def fetch_flaky_test_catalog() -> TestCaseIssueCatalog:
148+
"""
149+
Fetch and organize all flaky test issues from GitHub.
150+
151+
Returns:
152+
TestCaseIssueCatalog with active and resolved issues
153+
"""
154+
catalog = TestCaseIssueCatalog()
155+
156+
# Fetch open issues with label "flaky"
157+
print("\n--- Fetching active flaky test issues ---")
158+
open_issues = fetch_github_issues(label="flaky test", state="open")
159+
catalog.active_test_issues = process_issues(open_issues, is_closed=False)
160+
print(f"Processed {len(catalog.active_test_issues)} active issues\n")
161+
162+
# Fetch closed issues with label "flaky test" from the last 30 days
163+
print("--- Fetching resolved flaky test issues ---")
164+
closed_issues = fetch_github_issues(
165+
label="flaky test", state="closed", days_back=30
166+
)
167+
catalog.resolved_test_issues = process_issues(closed_issues, is_closed=True)
168+
print(f"Processed {len(catalog.resolved_test_issues)} resolved issues\n")
169+
170+
return catalog
171+
172+
173+
if __name__ == "__main__":
174+
results = []
175+
catalog = None
176+
177+
def fetch_catalog():
178+
global catalog
179+
catalog = fetch_flaky_test_catalog()
180+
catalog.dump()
181+
return True
182+
183+
results.append(
184+
Result.from_commands_run(name="Fetch flaky test issues", command=fetch_catalog)
185+
)
186+
187+
if results[-1].is_ok():
188+
# Print summary
189+
print("\n=== Flaky Test Issues Summary ===")
190+
print(f"Active issues: {len(catalog.active_test_issues)}")
191+
print(f"Resolved issues: {len(catalog.resolved_test_issues)}")
192+
193+
# Print sample of active issues
194+
if catalog.active_test_issues:
195+
print("\n--- Sample Active Issues ---")
196+
for issue in catalog.active_test_issues[:5]:
197+
print(f" Issue #{issue.issue}: {issue.test_name} - {issue.title}")
198+
199+
# Print sample of resolved issues
200+
if catalog.resolved_test_issues:
201+
print("\n--- Sample Resolved Issues ---")
202+
for issue in catalog.resolved_test_issues[:5]:
203+
print(
204+
f" Issue #{issue.issue}: {issue.test_name} - {issue.title} (closed: {issue.closed_at})"
205+
)
206+
207+
link = None
208+
if results[-1].is_ok():
209+
210+
def upload():
211+
local_name = catalog.file_name_static("flaky_test_catalog")
212+
compressed_name = Utils.compress_gz(local_name)
213+
global link
214+
link = S3.copy_file_to_s3(
215+
local_path=compressed_name,
216+
s3_path=f"{S3_REPORT_BUCKET_NAME}/statistics",
217+
content_type="application/json",
218+
content_encoding="gzip",
219+
)
220+
return True
221+
222+
results.append(
223+
Result.from_commands_run(name="Upload flaky test catalog", command=upload)
224+
)
225+
226+
# Complete the job
227+
Result.create_from(results=results, links=[link]).complete_job()

ci/jobs/scripts/clickhouse_proc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
import sys
55
import time
66
import traceback
7+
import uuid
78
from collections import defaultdict
89
from pathlib import Path
9-
import uuid
1010

1111
from ci.praktika import Secret
1212
from ci.praktika.info import Info

ci/praktika/dataclasses.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
from dataclasses import dataclass, field
2+
from typing import List
3+
4+
from .settings import Settings
5+
from .utils import MetaClasses, Utils
6+
7+
8+
@dataclass
9+
class TestCaseIssue:
10+
"""Represents a single flaky test issue from GitHub"""
11+
12+
test_name: str
13+
closed_at: str
14+
issue: int
15+
issue_url: str
16+
title: str
17+
body: str
18+
19+
20+
@dataclass
21+
class TestCaseIssueCatalog(MetaClasses.Serializable):
22+
"""Catalog of all flaky test issues, both active and resolved"""
23+
24+
name: str = "flaky_test_catalog"
25+
active_test_issues: List[TestCaseIssue] = field(default_factory=list)
26+
resolved_test_issues: List[TestCaseIssue] = field(default_factory=list)
27+
28+
@classmethod
29+
def file_name_static(cls, name):
30+
return f"{Settings.TEMP_DIR}/{Utils.normalize_string(name)}.json"
31+
32+
@classmethod
33+
def from_dict(cls, obj: dict):
34+
"""Custom deserialization to handle nested TestCaseIssue objects"""
35+
active_issues = [
36+
TestCaseIssue(**issue) if isinstance(issue, dict) else issue
37+
for issue in obj.get("active_test_issues", [])
38+
]
39+
resolved_issues = [
40+
TestCaseIssue(**issue) if isinstance(issue, dict) else issue
41+
for issue in obj.get("resolved_test_issues", [])
42+
]
43+
return cls(
44+
name=obj.get("name", "flaky_test_catalog"),
45+
active_test_issues=active_issues,
46+
resolved_test_issues=resolved_issues,
47+
)

0 commit comments

Comments
 (0)