-
Notifications
You must be signed in to change notification settings - Fork 110
Add Triton Backend #35
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
67e0c15
8bfdd21
32ff679
eb4e8aa
e9bf734
26d4cc0
e5ba9b3
01e0c0e
31116dd
c60b881
37dd8db
f0b88d7
6bbcf97
e2bc271
a5f63af
dc87a37
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,17 +1,28 @@ | ||
| anthropic | ||
| # Frameworks | ||
| torch==2.5.0 | ||
| # we shall upgrade torch for blackwell when it is stable | ||
| transformers | ||
| datasets | ||
| modal | ||
| numpy | ||
| openai | ||
|
|
||
| # DSLs | ||
| nvidia-cutlass-dsl | ||
|
|
||
| # helper | ||
| tqdm | ||
| packaging | ||
| pydra_config | ||
| torch==2.5.0 | ||
| tqdm | ||
| datasets | ||
| transformers | ||
| google-generativeai | ||
| together | ||
| pytest | ||
| ninja | ||
| archon-ai | ||
|
|
||
| # Numerics | ||
| einops | ||
| dotenv | ||
| dotenv | ||
| numpy | ||
|
|
||
| # to deprecate with litellm | ||
| google-generativeai | ||
| together | ||
| openai | ||
| anthropic | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,13 +3,21 @@ | |
| import os, sys | ||
| import torch | ||
| import json | ||
| import modal | ||
|
|
||
| from datasets import load_dataset | ||
|
|
||
| from src.dataset import construct_kernelbench_dataset | ||
| from src.eval import eval_kernel_against_ref | ||
| from src.prompt_constructor import prompt_generate_custom_cuda_from_prompt_template | ||
| from src.utils import extract_first_code, query_server, set_gpu_arch, read_file, create_inference_server_from_presets | ||
| from src.prompt_constructor_multilang import get_prompt_for_backend | ||
| from src.utils import ( | ||
| create_inference_server_from_presets, | ||
| extract_first_code, | ||
| query_server, | ||
| read_file, | ||
| set_gpu_arch, | ||
| ) | ||
|
|
||
| """ | ||
| Generate and evaluate a single sample | ||
|
|
@@ -20,15 +28,15 @@ | |
|
|
||
| torch.set_printoptions(precision=4, threshold=10) | ||
|
|
||
|
|
||
| class EvalConfig(Config): | ||
| def __init__(self): | ||
| self.dataset_src = REQUIRED # either huggingface or local | ||
|
|
||
| self.dataset_src = REQUIRED # either huggingface or local | ||
|
|
||
| # name of dataset name on Hugging Face | ||
| self.dataset_name = "ScalingIntelligence/KernelBench" | ||
|
|
||
|
|
||
| # Problem Specification | ||
| self.level = REQUIRED | ||
| # NOTE: this is the logical index (problem id the problem_name)\ | ||
|
|
@@ -56,6 +64,8 @@ def __init__(self): | |
| self.log_generated_kernel = False | ||
| self.log_eval_result = False | ||
|
|
||
| self.backend = "cuda" | ||
|
|
||
| def verbose_logging(self): | ||
| self.log = True | ||
| self.log_prompt = True | ||
|
|
@@ -86,24 +96,31 @@ def main(config: EvalConfig): | |
|
|
||
| if config.log: | ||
| os.makedirs(config.logdir, exist_ok=True) | ||
|
|
||
| # Problem Checks | ||
| num_problems = len(curr_level_dataset) | ||
| print(f"Number of problems in Level {config.level}: {num_problems}") | ||
| print(f"Start Generation + Evaluation for Level {config.level} Problem {config.problem_id}") | ||
|
|
||
| assert config.problem_id <= num_problems, f"Problem ID {config.problem_id} out of range for Level {config.level}" | ||
| print( | ||
| f"Start Generation + Evaluation for Level {config.level} Problem {config.problem_id}" | ||
| ) | ||
|
|
||
| assert ( | ||
| config.problem_id <= num_problems | ||
| ), f"Problem ID {config.problem_id} out of range for Level {config.level}" | ||
|
|
||
| # 1. Fetch Problem | ||
| if config.dataset_src == "huggingface": | ||
|
|
||
| curr_problem_row = curr_level_dataset.filter(lambda x: x["problem_id"] == config.problem_id) | ||
| curr_problem_row = curr_level_dataset.filter( | ||
| lambda x: x["problem_id"] == config.problem_id | ||
| ) | ||
| ref_arch_src = curr_problem_row["code"][0] | ||
| problem_name = curr_problem_row["name"][0] | ||
|
|
||
| elif config.dataset_src == "local": | ||
| problem_idx_in_dataset = config.problem_id - 1 # due to dataset list being 0-indexed locally | ||
| problem_idx_in_dataset = ( | ||
| config.problem_id - 1 | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @pythonomar22 this is something we will get rid of with your new benchmark data class so we dont' have to deal with all these nasty off-by-one issue |
||
| ) # due to dataset list being 0-indexed locally | ||
| ref_arch_path = curr_level_dataset[problem_idx_in_dataset] | ||
|
|
||
| problem_name = os.path.basename(ref_arch_path) | ||
|
|
@@ -112,52 +129,90 @@ def main(config: EvalConfig): | |
|
|
||
| # Extract problem number from problem name (e.g. "1" from "1_Square_matrix_multiplication_.py") | ||
| problem_number = int(problem_name.split("_")[0]) | ||
| assert problem_number == config.problem_id, f"Problem number in filename ({problem_number}) does not match config problem_id ({config.problem_id})" | ||
|
|
||
|
|
||
| assert ( | ||
| problem_number == config.problem_id | ||
| ), f"Problem number in filename ({problem_number}) does not match config problem_id ({config.problem_id})" | ||
|
|
||
| # 2. Generate Sample | ||
| # Create inference function with config parameters | ||
| # We provide some presets in utils but you can also pass in your own, see query_server for more details | ||
| inference_server = create_inference_server_from_presets(server_type=config.server_type, | ||
| model_name=config.model_name, | ||
| temperature=config.temperature, | ||
| max_tokens=config.max_tokens, | ||
| verbose=config.verbose, | ||
| time_generation=True) | ||
|
|
||
| inference_server = create_inference_server_from_presets( | ||
| server_type=config.server_type, | ||
| model_name=config.model_name, | ||
| temperature=config.temperature, | ||
| max_tokens=config.max_tokens, | ||
| verbose=config.verbose, | ||
| time_generation=True, | ||
| ) | ||
|
|
||
| # Use appropriate prompt constructor based on backend | ||
| if config.backend == "cuda": | ||
| custom_prompt = prompt_generate_custom_cuda_from_prompt_template(ref_arch_src) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @AffectionateCurry i see what you mean here now. we can refactor this later with a better prompt template! |
||
| elif config.backend in ["triton", "cute"]: # removed "tilelang" | ||
| custom_prompt = get_prompt_for_backend(ref_arch_src, config.backend) | ||
| else: | ||
| raise ValueError( | ||
| f"Unsupported backend: {config.backend}. Must be 'cuda', 'triton', or 'cute'." | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice catch here, we shall update read me in the pre-GPU mode hackathon to list these are available options |
||
| ) | ||
|
|
||
| custom_cuda_prompt = prompt_generate_custom_cuda_from_prompt_template(ref_arch_src) | ||
| if config.log_prompt: | ||
| with open(os.path.join(config.logdir, f"prompt_level_{config.level}_problem_{config.problem_id}.txt"), "w") as f: | ||
| f.write(custom_cuda_prompt) | ||
| with open( | ||
| os.path.join( | ||
| config.logdir, | ||
| f"prompt_level_{config.level}_problem_{config.problem_id}.txt", | ||
| ), | ||
| "w", | ||
| ) as f: | ||
| f.write(custom_prompt) | ||
|
|
||
| # Query server with constructed prompt | ||
| custom_cuda = inference_server(custom_cuda_prompt) | ||
| custom_cuda = extract_first_code(custom_cuda, ["python", "cpp"]) | ||
| # check LLM is able to generate custom CUDA code | ||
| assert custom_cuda is not None, "Custom CUDA code generation failed" | ||
|
|
||
| custom_kernel = inference_server(custom_prompt) | ||
| custom_kernel = extract_first_code(custom_kernel, ["python", "cpp"]) | ||
|
|
||
| # check LLM is able to generate custom kernel code | ||
| assert ( | ||
| custom_kernel is not None | ||
| ), f"Custom {config.backend} kernel code generation failed" | ||
|
|
||
| # this should be optional | ||
| if config.log: | ||
| with open(os.path.join(config.logdir, f"generated_kernel_level_{config.level}_problem_{config.problem_id}.py"), "w") as f: | ||
| f.write(custom_cuda) | ||
| with open( | ||
| os.path.join( | ||
| config.logdir, | ||
| f"generated_kernel_level_{config.level}_problem_{config.problem_id}.py", | ||
| ), | ||
| "w", | ||
| ) as f: | ||
| f.write(custom_kernel) | ||
|
|
||
| # 3. Evaluate Kernel | ||
| # NOTE: no need to wrap around process here as only a single sample | ||
| # see batch eval for examples of process isolation | ||
| kernel_exec_result = eval_kernel_against_ref( | ||
| ref_arch_src, custom_cuda, verbose=config.verbose, measure_performance=True, num_correct_trials=5, num_perf_trials=100 | ||
| ref_arch_src, | ||
| custom_kernel, | ||
| verbose=config.verbose, | ||
| measure_performance=True, | ||
| num_correct_trials=5, | ||
| num_perf_trials=100, | ||
| backend=config.backend, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. clean, nice interface |
||
| ) | ||
|
|
||
| print( | ||
| f"Evaluation result for level {config.level} problem {config.problem_id}:\n{kernel_exec_result}" | ||
| ) | ||
|
|
||
| print(f"Evaluation result for level {config.level} problem {config.problem_id}:\n{kernel_exec_result}") | ||
|
|
||
| if config.log: | ||
| with open(os.path.join(config.logdir, f"eval_result_level_{config.level}_problem_{config.problem_id}.txt"), "a") as f: | ||
| with open( | ||
| os.path.join( | ||
| config.logdir, | ||
| f"eval_result_level_{config.level}_problem_{config.problem_id}.txt", | ||
| ), | ||
| "a", | ||
| ) as f: | ||
| f.write(f"Problem Name: {problem_name}\n") | ||
| f.write(str(kernel_exec_result)) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
|
|
||
| main() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
great check