|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +import json |
| 4 | +import re |
| 5 | +from datetime import datetime, timedelta |
| 6 | +from typing import List, Optional |
| 7 | + |
| 8 | +from ci.praktika.dataclasses import TestCaseIssue, TestCaseIssueCatalog |
| 9 | +from ci.praktika.result import Result |
| 10 | +from ci.praktika.s3 import S3 |
| 11 | +from ci.praktika.utils import Shell, Utils |
| 12 | +from ci.settings.settings import S3_REPORT_BUCKET_NAME |
| 13 | + |
| 14 | + |
| 15 | +def extract_test_name(title: str, body: str) -> Optional[str]: |
| 16 | + """ |
| 17 | + Extract test name from issue title or body. |
| 18 | + Pattern: starts with 5-digit number + _* OR test_* |
| 19 | + """ |
| 20 | + # Combine title and body for searching |
| 21 | + text = f"{title}\n{body}" |
| 22 | + |
| 23 | + # Pattern 1: 5-digit number followed by underscore and anything |
| 24 | + pattern1 = r"\b(\d{5}_\S+)" |
| 25 | + match1 = re.search(pattern1, text) |
| 26 | + if match1: |
| 27 | + test_name = match1.group(1) |
| 28 | + # Strip trailing quotes, backticks, and other punctuation |
| 29 | + return test_name.rstrip("`'\",.;:!?)") |
| 30 | + |
| 31 | + # Pattern 2: test_ followed by anything (word characters, underscores) |
| 32 | + pattern2 = r"\b(test_\w+)" |
| 33 | + match2 = re.search(pattern2, text) |
| 34 | + if match2: |
| 35 | + test_name = match2.group(1) |
| 36 | + # Strip trailing quotes, backticks, and other punctuation |
| 37 | + return test_name.rstrip("`'\",.;:!?)") |
| 38 | + |
| 39 | + return None |
| 40 | + |
| 41 | + |
| 42 | +def fetch_github_issues( |
| 43 | + label: str, state: str = "open", days_back: int = None |
| 44 | +) -> List[dict]: |
| 45 | + """ |
| 46 | + Fetch issues from GitHub using gh CLI with manual pagination. |
| 47 | +
|
| 48 | + Args: |
| 49 | + label: GitHub label to filter by |
| 50 | + state: Issue state (open or closed) |
| 51 | + days_back: For closed issues, only fetch those closed within this many days (default: None for all) |
| 52 | +
|
| 53 | + Returns: |
| 54 | + List of issue dictionaries |
| 55 | + """ |
| 56 | + all_issues = [] |
| 57 | + limit_per_request = 1000 # Maximum we'll fetch per request |
| 58 | + |
| 59 | + # Build base command |
| 60 | + if state == "closed" and days_back: |
| 61 | + date_threshold = (datetime.now() - timedelta(days=days_back)).strftime( |
| 62 | + "%Y-%m-%d" |
| 63 | + ) |
| 64 | + search_query = f'label:"{label}" is:closed closed:>{date_threshold}' |
| 65 | + base_cmd = f"gh issue list --search '{search_query}' --json number,title,body,closedAt --limit {limit_per_request}" |
| 66 | + print( |
| 67 | + f"Fetching {state} issues with label '{label}' closed in last {days_back} days (since {date_threshold})..." |
| 68 | + ) |
| 69 | + else: |
| 70 | + base_cmd = f'gh issue list --label "{label}" --state {state} --json number,title,body,closedAt --limit {limit_per_request}' |
| 71 | + print(f"Fetching {state} issues with label '{label}'...") |
| 72 | + |
| 73 | + try: |
| 74 | + output = Shell.get_output(base_cmd, verbose=True) |
| 75 | + |
| 76 | + if not output or not output.strip(): |
| 77 | + print(f" No issues found for label '{label}' with state '{state}'") |
| 78 | + return [] |
| 79 | + |
| 80 | + issues = json.loads(output) |
| 81 | + if not issues: |
| 82 | + print(f" No issues found for label '{label}' with state '{state}'") |
| 83 | + return [] |
| 84 | + |
| 85 | + all_issues.extend(issues) |
| 86 | + print(f" Found {len(all_issues)} issues") |
| 87 | + |
| 88 | + # Check if we hit the limit and warn user |
| 89 | + if len(issues) == limit_per_request: |
| 90 | + print( |
| 91 | + f" WARNING: Reached limit of {limit_per_request} issues. There may be more issues not fetched." |
| 92 | + ) |
| 93 | + |
| 94 | + return all_issues |
| 95 | + except json.JSONDecodeError as e: |
| 96 | + print(f"ERROR: Failed to parse JSON response for label '{label}': {e}") |
| 97 | + return [] |
| 98 | + except Exception as e: |
| 99 | + print(f"ERROR: Failed to fetch issues with label '{label}': {e}") |
| 100 | + return [] |
| 101 | + |
| 102 | + |
| 103 | +def process_issues(issues: List[dict], is_closed: bool = False) -> List[TestCaseIssue]: |
| 104 | + """ |
| 105 | + Process raw GitHub issues into TestCaseIssue objects. |
| 106 | +
|
| 107 | + Args: |
| 108 | + issues: List of raw issue dictionaries from GitHub |
| 109 | + is_closed: Whether these are closed issues |
| 110 | +
|
| 111 | + Returns: |
| 112 | + List of TestCaseIssue objects |
| 113 | + """ |
| 114 | + test_case_issues = [] |
| 115 | + |
| 116 | + for issue in issues: |
| 117 | + number = issue.get("number", "") |
| 118 | + title = issue.get("title", "") |
| 119 | + body = issue.get("body", "") |
| 120 | + closed_at = issue.get("closedAt", "") |
| 121 | + |
| 122 | + # Extract test name from title or body |
| 123 | + test_name = extract_test_name(title, body) |
| 124 | + |
| 125 | + if not test_name: |
| 126 | + print( |
| 127 | + f" Warning: Could not extract test name from issue #{number}: {title}" |
| 128 | + ) |
| 129 | + test_name = "unknown" |
| 130 | + |
| 131 | + # Construct GitHub issue URL |
| 132 | + issue_url = f"https://github.com/ClickHouse/ClickHouse/issues/{number}" |
| 133 | + |
| 134 | + test_case_issue = TestCaseIssue( |
| 135 | + test_name=test_name, |
| 136 | + closed_at=closed_at if closed_at else "", |
| 137 | + issue=int(number), |
| 138 | + issue_url=issue_url, |
| 139 | + title=title, |
| 140 | + body=body if body else "", |
| 141 | + ) |
| 142 | + test_case_issues.append(test_case_issue) |
| 143 | + |
| 144 | + return test_case_issues |
| 145 | + |
| 146 | + |
| 147 | +def fetch_flaky_test_catalog() -> TestCaseIssueCatalog: |
| 148 | + """ |
| 149 | + Fetch and organize all flaky test issues from GitHub. |
| 150 | +
|
| 151 | + Returns: |
| 152 | + TestCaseIssueCatalog with active and resolved issues |
| 153 | + """ |
| 154 | + catalog = TestCaseIssueCatalog() |
| 155 | + |
| 156 | + # Fetch open issues with label "flaky" |
| 157 | + print("\n--- Fetching active flaky test issues ---") |
| 158 | + open_issues = fetch_github_issues(label="flaky test", state="open") |
| 159 | + catalog.active_test_issues = process_issues(open_issues, is_closed=False) |
| 160 | + print(f"Processed {len(catalog.active_test_issues)} active issues\n") |
| 161 | + |
| 162 | + # Fetch closed issues with label "flaky test" from the last 30 days |
| 163 | + print("--- Fetching resolved flaky test issues ---") |
| 164 | + closed_issues = fetch_github_issues( |
| 165 | + label="flaky test", state="closed", days_back=30 |
| 166 | + ) |
| 167 | + catalog.resolved_test_issues = process_issues(closed_issues, is_closed=True) |
| 168 | + print(f"Processed {len(catalog.resolved_test_issues)} resolved issues\n") |
| 169 | + |
| 170 | + return catalog |
| 171 | + |
| 172 | + |
| 173 | +if __name__ == "__main__": |
| 174 | + results = [] |
| 175 | + catalog = None |
| 176 | + |
| 177 | + def fetch_catalog(): |
| 178 | + global catalog |
| 179 | + catalog = fetch_flaky_test_catalog() |
| 180 | + catalog.dump() |
| 181 | + return True |
| 182 | + |
| 183 | + results.append( |
| 184 | + Result.from_commands_run(name="Fetch flaky test issues", command=fetch_catalog) |
| 185 | + ) |
| 186 | + |
| 187 | + if results[-1].is_ok(): |
| 188 | + # Print summary |
| 189 | + print("\n=== Flaky Test Issues Summary ===") |
| 190 | + print(f"Active issues: {len(catalog.active_test_issues)}") |
| 191 | + print(f"Resolved issues: {len(catalog.resolved_test_issues)}") |
| 192 | + |
| 193 | + # Print sample of active issues |
| 194 | + if catalog.active_test_issues: |
| 195 | + print("\n--- Sample Active Issues ---") |
| 196 | + for issue in catalog.active_test_issues[:5]: |
| 197 | + print(f" Issue #{issue.issue}: {issue.test_name} - {issue.title}") |
| 198 | + |
| 199 | + # Print sample of resolved issues |
| 200 | + if catalog.resolved_test_issues: |
| 201 | + print("\n--- Sample Resolved Issues ---") |
| 202 | + for issue in catalog.resolved_test_issues[:5]: |
| 203 | + print( |
| 204 | + f" Issue #{issue.issue}: {issue.test_name} - {issue.title} (closed: {issue.closed_at})" |
| 205 | + ) |
| 206 | + |
| 207 | + link = None |
| 208 | + if results[-1].is_ok(): |
| 209 | + |
| 210 | + def upload(): |
| 211 | + local_name = catalog.file_name_static("flaky_test_catalog") |
| 212 | + compressed_name = Utils.compress_gz(local_name) |
| 213 | + global link |
| 214 | + link = S3.copy_file_to_s3( |
| 215 | + local_path=compressed_name, |
| 216 | + s3_path=f"{S3_REPORT_BUCKET_NAME}/statistics", |
| 217 | + content_type="application/json", |
| 218 | + content_encoding="gzip", |
| 219 | + ) |
| 220 | + return True |
| 221 | + |
| 222 | + results.append( |
| 223 | + Result.from_commands_run(name="Upload flaky test catalog", command=upload) |
| 224 | + ) |
| 225 | + |
| 226 | + # Complete the job |
| 227 | + Result.create_from(results=results, links=[link]).complete_job() |
0 commit comments