feat(bindings/python)!: Generate stubs for Operator overloads and Scheme#6729
feat(bindings/python)!: Generate stubs for Operator overloads and Scheme#6729Xuanwo merged 16 commits intoapache:mainfrom
Conversation
| A closed file cannot be used for further I/O operations. | ||
| """ | ||
| def __aenter__(self) -> typing.Self: ... | ||
| def __aenter__(self) -> typing_extensions.Self: ... |
There was a problem hiding this comment.
typing.Self was not available in python 3.10 which opendal supports, so fixed this
| } | ||
|
|
||
| #[gen_stub(override_return_type(type_repr="typing.Self", imports=("typing")))] | ||
| #[gen_stub(override_return_type(type_repr="typing_extensions.Self", imports=("typing_extensions")))] |
| [group('build')] | ||
| stub-gen: setup | ||
| @echo "{{ BOLD }}--- Generating Python type stubs ---{{ NORMAL }}" | ||
| @cargo run --quiet --manifest-path=../../dev/Cargo.toml -- generate -l python |
There was a problem hiding this comment.
Overloads and Scheme generates as part of the stub-gen process
There was a problem hiding this comment.
This made me think that can we move stub_gen to odev instead.
There was a problem hiding this comment.
This made me think that can we move
stub_gentoodevinstead.
actually these are 2 separate processes that cannot be merged into the binding or in odev from what i understood.
- pyo3-stub-gen requires.... well, pyo3 which is available in python bindings. this step is responsible for generating most of the python stubs from rust code.
- the stuff in odev uses jinja templating because it has parsing logic of services code used for java as well. if odev had lib target then it could be used as a build dependency but still would be separate from pyo3 stuff.
dev/src/generate/new_python.j2
Outdated
| @@ -0,0 +1,155 @@ | |||
| // Licensed to the Apache Software Foundation (ASF) under one | |||
There was a problem hiding this comment.
this file is temporary. will replace python.j2 with this.
done
| "etcd" | "foundationdb" | "ftp" | "hdfs" | "rocksdb" | "tikv" | "sftp" | "github" | ||
| | "cloudflare_kv" | "monoiofs" | "dbfs" | "surrealdb" | "d1" | "opfs" | "compfs" | ||
| | "lakefs" | "pcloud" | "vercel_blob" => false, |
There was a problem hiding this comment.
will re enable some of these in another PR
| pub fn format_text(text: &str, indent: usize, max_line_length: usize) -> String { | ||
| // Precompute indentation string | ||
| let indent_str = " ".repeat(indent); |
There was a problem hiding this comment.
was trying to avoid an ext. crate for this this task.
| ConfigType::Bool => "builtins.bool", | ||
| ConfigType::Duration => "typing.Any", | ||
| ConfigType::I64 | ||
| | ConfigType::Usize | ||
| | ConfigType::U64 | ||
| | ConfigType::U32 | ||
| | ConfigType::U16 => "_int", | ||
| ConfigType::Vec => "_strings", | ||
| ConfigType::String => "str", | ||
| | ConfigType::U16 => "builtins.int", | ||
| ConfigType::Vec => "typing.Any", | ||
| ConfigType::String => "builtins.str", |
There was a problem hiding this comment.
should be able to cast automatically
| from opendal._opendal import ( # noqa: F403 | ||
| capability, | ||
| exceptions, | ||
| file, | ||
| layers, | ||
| services, | ||
| types, | ||
| ) | ||
| from opendal.operator import AsyncOperator, Operator # pyright:ignore | ||
|
|
||
| __version__: builtins.str | ||
|
|
||
| __all__ = _opendal.__all__ # noqa: F405 # pyright:ignore | ||
| __all__ += ["AsyncOperator", "Operator"] # pyright:ignore | ||
| __all__ = [ | ||
| "capability", | ||
| "exceptions", | ||
| "file", | ||
| "layers", | ||
| "services", | ||
| "types", | ||
| "AsyncOperator", | ||
| "Operator", |
There was a problem hiding this comment.
makes things explicit
dev/src/generate/new_python.j2
Outdated
| {% for srv in srvs %} | ||
| submit! { | ||
| gen_methods_from_python! { | ||
| r#" | ||
| import builtins | ||
| import typing | ||
| import typing_extensions | ||
| class Operator: | ||
| def __new__(cls, | ||
| scheme: typing.Literal["{{ snake_to_kebab_case(srv) }}"], | ||
| /, | ||
| {%- if srvs[srv].config %} |
There was a problem hiding this comment.
Operator overloads
dev/src/generate/new_python.j2
Outdated
| import builtins | ||
| import typing | ||
| import typing_extensions | ||
| class AsyncOperator: | ||
| def __new__(cls, | ||
| scheme: typing.Literal["{{ snake_to_kebab_case(srv) }}"], | ||
| /, | ||
| {%- if srvs[srv].config %} | ||
| *, |
There was a problem hiding this comment.
AsyncOperator overloads
| /// The new operator. | ||
| #[gen_stub(skip)] | ||
| #[new] |
Xuanwo
left a comment
There was a problem hiding this comment.
Thank you for working on this, mostly LGTM, just some questions.
| import typing | ||
|
|
||
| @typing.final | ||
| class Scheme(enum.Enum): |
There was a problem hiding this comment.
I have a goal to finally split OpenDAL into separate crates so users can easily implement and use their own services. On the Python side, I want to dynamically load them. Is there a way to enable this?
This is not a blocker: we can still merge this PR as is, but let’s keep this idea in mind as we move forward.
There was a problem hiding this comment.
separate crates how? by bindings or services ?
On the Python side, I want to dynamically load them. Is there a way to enable this?
in python world, yes via dependency groups, in rust world, yes by feature gating but in rust-python world via pyo3, i've never done this before so not sure. but then i also never did stub-gen before this, so will take a shot at it when that happens.
assuming a service level split:
makes me think that if there was a way to generate language agnostic linkable code for the services, the bindings can actually be generated via some sort of a spec driven development model. Elasticsearch and aws do this but they're server based so its easier for them.
duckdb does this as well (think of opendal services as duckdb extensions - cross platform and language agnostic) but need to think more on this. is there a issue/ discussion/ rfc for this where i can contribute better ?
There was a problem hiding this comment.
The idea is shown here: #5206
In short, the OpenDAL Rust core will eventually become opendal-service-s3, opendal-service-azblob, and more. I want to know if this change can benefit Python packages. Can we avoid delivering large wheels to users?
There was a problem hiding this comment.
raised astral-sh/uv#16465, which if answered will help.
maybe @messense has some suggestions from maturin side.
| [group('build')] | ||
| stub-gen: setup | ||
| @echo "{{ BOLD }}--- Generating Python type stubs ---{{ NORMAL }}" | ||
| @cargo run --quiet --manifest-path=../../dev/Cargo.toml -- generate -l python |
There was a problem hiding this comment.
This made me think that can we move stub_gen to odev instead.
| paths: [python] | ||
| options: | ||
| docstring_style: google | ||
| docstring_style: numpy |
There was a problem hiding this comment.
What's this change for? You perfer numpy style?
There was a problem hiding this comment.
well before all these PRs, the code had a mix of docstring conventions which i wanted to standardize as well so i while doing this (with the help of LLMs), all the generated docstrings turned out to be in numpy convention.
today i checked that mkdocs was not rendering it properly so made this change to get it working properly.
numpy style is slightly verbose but its a very well established standard as well.
|
postgresql test is stuck. |
|
@Xuanwo any other comments from your side before this can be merged? |
Xuanwo
left a comment
There was a problem hiding this comment.
#6729 (comment) may need a follow-up, but other changes LGTM, let's move!
Which issue does this PR close?
Closes #6686.
Rationale for this change
This uses an alternate approach to generate the overloads previously in
__base.pyi.Instead of rendering
__base.pyidirectly, this approach generates a checked rust code instead which then gets submitted topyo3-stub-genand the resultant stubservices.pyiis safely generated with all other stubs at the same time.I aim to make this the parent class of

AsyncOperatorandOperatorso that they can get all the overloads.What changes are included in this PR?
dev/src/generate/python.j2#6686 along with rest of the stubs__new__in Operator for each service__new__in AysncOperator for each service__base.pyiservicespy module containing a Scheme which can also be used in the constructors of operatorsAre there any user-facing changes?
No breaking changes included.
Users get better docs and typing.