-
Notifications
You must be signed in to change notification settings - Fork 126
Description
This test failed!
To configure my behavior, see the Flaky Bot documentation.
If I'm commenting on this issue too often, add the flakybot: quiet label and
I will stop commenting.
commit: 6c6c303
buildURL: Build Status, Sponge
status: failed
Test output
self =
query = '\n SELECT\n SUM(bottles_sold) total_bottles,\n UPPER(category_name) category_name,\n ...om)\n GROUP BY category_name, magnitude, zip_code\n ORDER BY magnitude ASC, total_bottles DESC\n '
max_results = None, progress_bar_type = 'tqdm'
kwargs = {'configuration': {'jobTimeoutMs': 1, 'query': {'useQueryCache': False}}, 'dtypes': None}
TimeoutError =
RefreshError =
bigquery =
job_config = {'jobTimeoutMs': 1, 'query': {'useQueryCache': False}}
config = {'jobTimeoutMs': 1, 'query': {'useQueryCache': False}}
query_reply = QueryJob
def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
from concurrent.futures import TimeoutError
from google.auth.exceptions import RefreshError
from google.cloud import bigquery
job_config = {
"query": {
"useLegacySql": self.dialect
== "legacy"
# 'allowLargeResults', 'createDisposition',
# 'preserveNulls', destinationTable, useQueryCache
}
}
config = kwargs.get("configuration")
if config is not None:
job_config.update(config)
self._start_timer()
try:
logger.debug("Requesting query... ")
query_reply = self.client.query(
query,
job_config=bigquery.QueryJobConfig.from_api_repr(job_config),
location=self.location,
project=self.project_id,
)
logger.debug("Query running...")
except (RefreshError, ValueError):
if self.private_key:
raise AccessDenied("The service account credentials are not valid")
else:
raise AccessDenied(
"The credentials have been revoked or expired, "
"please re-run the application to re-authorize"
)
except self.http_error as ex:
self.process_http_error(ex)
job_id = query_reply.job_id
logger.debug("Job ID: %s" % job_id)
while query_reply.state != "DONE":
self.log_elapsed_seconds(" Elapsed", "s. Waiting...")
timeout_ms = job_config.get("jobTimeoutMs") or job_config["query"].get(
"timeoutMs"
)
timeout_ms = int(timeout_ms) if timeout_ms else None
if timeout_ms and timeout_ms < self.get_elapsed_seconds() * 1000:
raise QueryTimeout("Query timeout: {} ms".format(timeout_ms))
timeout_sec = 1.0
if timeout_ms:
# Wait at most 1 second so we can show progress bar
timeout_sec = min(1.0, timeout_ms / 1000.0)
try:
query_reply.result(timeout=timeout_sec)
except TimeoutError:
# Use our own timeout logic
pass
except self.http_error as ex:
self.process_http_error(ex)
if query_reply.cache_hit:
logger.debug("Query done.\nCache hit.\n")
else:
bytes_processed = query_reply.total_bytes_processed or 0
bytes_billed = query_reply.total_bytes_billed or 0
logger.debug(
"Query done.\nProcessed: {} Billed: {}".format(
self.sizeof_fmt(bytes_processed), self.sizeof_fmt(bytes_billed),
)
)
logger.debug(
"Standard price: ${:,.2f} USD\n".format(
bytes_billed * self.query_price_for_TB
)
)
dtypes = kwargs.get("dtypes")
# Ensure destination is populated.
try:
query_reply.result()
pandas_gbq/gbq.py:494:
self = QueryJob<project=precise-truck-742, location=US, id=62c91570-5bf8-4f84-8f69-68d3e740605f>
page_size = None, max_results = None
retry = <google.api_core.retry.Retry object at 0x7fbec2353580>, timeout = None
start_index = None
job_retry = <google.api_core.retry.Retry object at 0x7fbec23800d0>
def result( # type: ignore # (complaints about the overloaded signature)
self,
page_size: int = None,
max_results: int = None,
retry: "retries.Retry" = DEFAULT_RETRY,
timeout: float = None,
start_index: int = None,
job_retry: "retries.Retry" = DEFAULT_JOB_RETRY,
) -> Union["RowIterator", _EmptyRowIterator]:
"""Start the job and wait for it to complete and get the result.
Args:
page_size (Optional[int]):
The maximum number of rows in each page of results from this
request. Non-positive values are ignored.
max_results (Optional[int]):
The maximum total number of rows from this request.
retry (Optional[google.api_core.retry.Retry]):
How to retry the call that retrieves rows. This only
applies to making RPC calls. It isn't used to retry
failed jobs. This has a reasonable default that
should only be overridden with care. If the job state
is ``DONE``, retrying is aborted early even if the
results are not available, as this will not change
anymore.
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
If multiple requests are made under the hood, ``timeout``
applies to each individual request.
start_index (Optional[int]):
The zero-based index of the starting row to read.
job_retry (Optional[google.api_core.retry.Retry]):
How to retry failed jobs. The default retries
rate-limit-exceeded errors. Passing ``None`` disables
job retry.
Not all jobs can be retried. If ``job_id`` was
provided to the query that created this job, then the
job returned by the query will not be retryable, and
an exception will be raised if non-``None``
non-default ``job_retry`` is also provided.
Returns:
google.cloud.bigquery.table.RowIterator:
Iterator of row data
:class:`~google.cloud.bigquery.table.Row`-s. During each
page, the iterator will have the ``total_rows`` attribute
set, which counts the total number of rows **in the result
set** (this is distinct from the total number of rows in the
current page: ``iterator.page.num_items``).
If the query is a special query that produces no results, e.g.
a DDL query, an ``_EmptyRowIterator`` instance is returned.
Raises:
google.cloud.exceptions.GoogleAPICallError:
If the job failed and retries aren't successful.
concurrent.futures.TimeoutError:
If the job did not complete in the given timeout.
TypeError:
If Non-``None`` and non-default ``job_retry`` is
provided and the job is not retryable.
"""
if self.dry_run:
return _EmptyRowIterator()
try:
retry_do_query = getattr(self, "_retry_do_query", None)
if retry_do_query is not None:
if job_retry is DEFAULT_JOB_RETRY:
job_retry = self._job_retry # type: ignore
else:
if job_retry is not None and job_retry is not DEFAULT_JOB_RETRY:
raise TypeError(
"`job_retry` was provided, but this job is"
" not retryable, because a custom `job_id` was"
" provided to the query that created this job."
)
first = True
def do_get_result():
nonlocal first
if first:
first = False
else:
# Note that we won't get here if retry_do_query is
# None, because we won't use a retry.
# The orinal job is failed. Create a new one.
job = retry_do_query()
# If it's already failed, we might as well stop:
if job.done() and job.exception() is not None:
raise job.exception()
# Become the new job:
self.__dict__.clear()
self.__dict__.update(job.__dict__)
# This shouldn't be necessary, because once we have a good
# job, it should stay good,and we shouldn't have to retry.
# But let's be paranoid. :)
self._retry_do_query = retry_do_query
self._job_retry = job_retry
super(QueryJob, self).result(retry=retry, timeout=timeout)
# Since the job could already be "done" (e.g. got a finished job
# via client.get_job), the superclass call to done() might not
# set the self._query_results cache.
self._reload_query_results(retry=retry, timeout=timeout)
if retry_do_query is not None and job_retry is not None:
do_get_result = job_retry(do_get_result)
do_get_result()
.nox/system-3-10/lib/python3.10/site-packages/google/cloud/bigquery/job/query.py:1451:
args = (), kwargs = {}
target = functools.partial(<function QueryJob.result..do_get_result at 0x7fbec1c82d40>)
sleep_generator = <generator object exponential_sleep_generator at 0x7fbec1c05150>
@functools.wraps(func)
def retry_wrapped_func(*args, **kwargs):
"""A wrapper that calls target function with retry."""
target = functools.partial(func, *args, **kwargs)
sleep_generator = exponential_sleep_generator(
self._initial, self._maximum, multiplier=self._multiplier
)
return retry_target(
target,
self._predicate,
sleep_generator,
self._deadline,
on_error=on_error,
)
.nox/system-3-10/lib/python3.10/site-packages/google/api_core/retry.py:283:
target = functools.partial(<function QueryJob.result..do_get_result at 0x7fbec1c82d40>)
predicate = <function _job_should_retry at 0x7fbec236b010>
sleep_generator = <generator object exponential_sleep_generator at 0x7fbec1c05150>
deadline = 600.0, on_error = None
def retry_target(target, predicate, sleep_generator, deadline, on_error=None):
"""Call a function and retry if it fails.
This is the lowest-level retry helper. Generally, you'll use the
higher-level retry helper :class:`Retry`.
Args:
target(Callable): The function to call and retry. This must be a
nullary function - apply arguments with `functools.partial`.
predicate (Callable[Exception]): A callable used to determine if an
exception raised by the target should be considered retryable.
It should return True to retry or False otherwise.
sleep_generator (Iterable[float]): An infinite iterator that determines
how long to sleep between retries.
deadline (float): How long to keep retrying the target. The last sleep
period is shortened as necessary, so that the last retry runs at
``deadline`` (and not considerably beyond it).
on_error (Callable[Exception]): A function to call while processing a
retryable exception. Any error raised by this function will *not*
be caught.
Returns:
Any: the return value of the target function.
Raises:
google.api_core.RetryError: If the deadline is exceeded while retrying.
ValueError: If the sleep generator stops yielding values.
Exception: If the target raises a method that isn't retryable.
"""
if deadline is not None:
deadline_datetime = datetime_helpers.utcnow() + datetime.timedelta(
seconds=deadline
)
else:
deadline_datetime = None
last_exc = None
for sleep in sleep_generator:
try:
return target()
.nox/system-3-10/lib/python3.10/site-packages/google/api_core/retry.py:190:
def do_get_result():
nonlocal first
if first:
first = False
else:
# Note that we won't get here if retry_do_query is
# None, because we won't use a retry.
# The orinal job is failed. Create a new one.
job = retry_do_query()
# If it's already failed, we might as well stop:
if job.done() and job.exception() is not None:
raise job.exception()
# Become the new job:
self.__dict__.clear()
self.__dict__.update(job.__dict__)
# This shouldn't be necessary, because once we have a good
# job, it should stay good,and we shouldn't have to retry.
# But let's be paranoid. :)
self._retry_do_query = retry_do_query
self._job_retry = job_retry
super(QueryJob, self).result(retry=retry, timeout=timeout)
.nox/system-3-10/lib/python3.10/site-packages/google/cloud/bigquery/job/query.py:1441:
self = QueryJob<project=precise-truck-742, location=US, id=62c91570-5bf8-4f84-8f69-68d3e740605f>
retry = <google.api_core.retry.Retry object at 0x7fbec2353580>, timeout = None
def result( # type: ignore # (signature complaint)
self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None
) -> "_AsyncJob":
"""Start the job and wait for it to complete and get the result.
Args:
retry (Optional[google.api_core.retry.Retry]):
How to retry the RPC. If the job state is ``DONE``, retrying is aborted
early, as the job will not change anymore.
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
If multiple requests are made under the hood, ``timeout``
applies to each individual request.
Returns:
_AsyncJob: This instance.
Raises:
google.cloud.exceptions.GoogleAPICallError:
if the job failed.
concurrent.futures.TimeoutError:
if the job did not complete in the given timeout.
"""
if self.state is None:
self._begin(retry=retry, timeout=timeout)
kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry}
return super(_AsyncJob, self).result(timeout=timeout, **kwargs)
.nox/system-3-10/lib/python3.10/site-packages/google/cloud/bigquery/job/base.py:727:
self = QueryJob<project=precise-truck-742, location=US, id=62c91570-5bf8-4f84-8f69-68d3e740605f>
timeout = None, retry = <google.api_core.retry.Retry object at 0x7fbec2353cd0>
def result(self, timeout=None, retry=DEFAULT_RETRY):
"""Get the result of the operation, blocking if necessary.
Args:
timeout (int):
How long (in seconds) to wait for the operation to complete.
If None, wait indefinitely.
Returns:
google.protobuf.Message: The Operation's result.
Raises:
google.api_core.GoogleAPICallError: If the operation errors or if
the timeout is reached before the operation completes.
"""
kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry}
self._blocking_poll(timeout=timeout, **kwargs)
if self._exception is not None:
# pylint: disable=raising-bad-type
# Pylint doesn't recognize that this is valid in this case.
raise self._exception
E google.api_core.exceptions.GoogleAPICallError: 200 Job execution was cancelled: Job timed out after 0s
E
E Location: US
E Job ID: 62c91570-5bf8-4f84-8f69-68d3e740605f
.nox/system-3-10/lib/python3.10/site-packages/google/api_core/future/polling.py:137: GoogleAPICallError
During handling of the above exception, another exception occurred:
self = <system.test_gbq.TestReadGBQIntegration object at 0x7fbec1d79bd0>
project_id = 'precise-truck-742'
def test_timeout_configuration(self, project_id):
sql_statement = """
SELECT
SUM(bottles_sold) total_bottles,
UPPER(category_name) category_name,
magnitude,
liquor.zip_code zip_code
FROM `bigquery-public-data.iowa_liquor_sales.sales` liquor
JOIN `bigquery-public-data.geo_us_boundaries.zip_codes` zip_codes
ON liquor.zip_code = zip_codes.zip_code
JOIN `bigquery-public-data.noaa_historic_severe_storms.tornado_paths` tornados
ON liquor.date = tornados.storm_date
WHERE ST_INTERSECTS(tornado_path_geom, zip_code_geom)
GROUP BY category_name, magnitude, zip_code
ORDER BY magnitude ASC, total_bottles DESC
"""
configs = [
{"query": {"useQueryCache": False, "timeoutMs": 1}},
{"query": {"useQueryCache": False}, "jobTimeoutMs": 1},
]
for config in configs:
with pytest.raises(gbq.QueryTimeout):
gbq.read_gbq(
sql_statement,
project_id=project_id,
credentials=self.credentials,
configuration=config,
)
tests/system/test_gbq.py:496:
pandas_gbq/gbq.py:865: in read_gbq
final_df = connector.run_query(
pandas_gbq/gbq.py:496: in run_query
self.process_http_error(ex)
ex = GoogleAPICallError('Job execution was cancelled: Job timed out after 0s')
@staticmethod
def process_http_error(ex):
# See `BigQuery Troubleshooting Errors
# <https://cloud.google.com/bigquery/troubleshooting-errors>`__
raise GenericGBQException("Reason: {0}".format(ex))
E pandas_gbq.exceptions.GenericGBQException: Reason: 200 Job execution was cancelled: Job timed out after 0s
E
E Location: US
E Job ID: 62c91570-5bf8-4f84-8f69-68d3e740605f
pandas_gbq/gbq.py:381: GenericGBQException