Skip to content

test --remote-data failures #2290

@astrofrog

Description

@astrofrog

There are currently failures with all Python versions when running tests with --remote-data. Here are a sample:

=================================== FAILURES ===================================
_____________________________ test_outgoing_fails ______________________________
[gw1] linux2 -- Python 2.6.9 /home/travis/virtualenv/python2.6/bin/python
def test_outgoing_fails():
        with pytest.raises(IOError):
>           urlopen('http://www.astropy.org')
E           Failed: DID NOT RAISE
astropy/tests/tests/test_socketblocker.py:12: Failed
______________ [doctest] astropy.time.core.Time.get_delta_ut1_utc ______________
[gw1] linux2 -- Python 2.6.9 /home/travis/virtualenv/python2.6/bin/python
778             >>> delta, status = t.get_delta_ut1_utc(return_status=True)
779             >>> status == TIME_BEFORE_IERS_RANGE
780             array([ True, False], dtype=bool)
781 
782         To use an updated IERS A bulletin to calculate UT1-UTC
783         (see also `~astropy.utils.iers`)::
784 
785             >>> from astropy.utils.iers import IERS_A, IERS_A_URL
786             >>> from astropy.utils.data import download_file
787             >>> iers_a_file = download_file(IERS_A_URL,
Expected nothing
Got:
    Downloading http://maia.usno.navy.mil/ser7/finals2000A.all [Done]
/tmp/astropy-test-TGux7f/lib.linux-x86_64-2.6/astropy/time/core.py:787: DocTestFailure
_____________________________ test_download_cache ______________________________
[gw7] linux2 -- Python 2.6.9 /home/travis/virtualenv/python2.6/bin/python
@remote_data
    def test_download_cache():

        from ..data import download_file, clear_download_cache

        fnout = download_file(TESTURL, cache=True)
>       assert os.path.isfile(fnout)
astropy/utils/tests/test_data.py:48: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
hashorurl = 'http://www.google.com/index.html'
    def clear_download_cache(hashorurl=None):
        """ Clears the data file cache by deleting the local file(s).

        Parameters
        ----------
        hashorurl : str or None
            If None, the whole cache is cleared.  Otherwise, either specifies a
            hash for the cached file that is supposed to be deleted, or a URL that
            has previously been downloaded to the cache.

        Raises
        ------
        OSEerror
            If the requested filename is not present in the data directory.

        """

        try:
            dldir, urlmapfn = _get_download_cache_locs()
        except (IOError, OSError) as e:
            msg = 'Not clearing data cache - cache inacessable due to '
            estr = '' if len(e.args) < 1 else (': ' + str(e))
            warn(CacheMissingWarning(msg + e.__class__.__name__ + estr))
            return

        _acquire_download_cache_lock()
        try:
            if hashorurl is None:
                if os.path.exists(dldir):
                    shutil.rmtree(dldir)
                if os.path.exists(urlmapfn):
                    os.unlink(urlmapfn)
            else:
                with _open_shelve(urlmapfn, True) as url2hash:
                    filepath = os.path.join(dldir, hashorurl)
                    assert _is_inside(filepath, dldir), \
                           ("attempted to use clear_download_cache on a location" +
                            " that's not inside the data cache directory")

                    if os.path.exists(filepath):
                        for k, v in list(six.iteritems(url2hash)):
                            if v == filepath:
                                del url2hash[k]
                        os.unlink(filepath)

>                   elif hashorurl in url2hash:
astropy/utils/data.py:1089: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
self = <[ValueError("invalid operation on closed shelf") raised in repr()] DbfilenameShelf object at 0xa08ed40>
key = 'http://www.google.com/index.html'
    def __contains__(self, key):
>       return key in self.dict
/usr/lib/python2.6/shelve.py:110: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
self = {}, key = 'http://www.google.com/index.html'
    def __contains__(self, key):
        try:
>           self[key]
/home/travis/virtualenv/python2.6/lib/python2.6/_abcoll.py:342: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
self = {}, key = 'http://www.google.com/index.html'
    def __getitem__(self, key):
        self._checkOpen()
>       return _DeadlockWrap(lambda: self.db[key])  # self.db[key]
/usr/lib/python2.6/bsddb/__init__.py:270: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
function = <function <lambda> at 0xc353ed8>, _args = (), _kwargs = {}
sleeptime = 0.0078125, max_retries = -1
    def DeadlockWrap(function, *_args, **_kwargs):
        """DeadlockWrap(function, *_args, **_kwargs) - automatically retries
        function in case of a database deadlock.

        This is a function intended to be used to wrap database calls such
        that they perform retrys with exponentially backing off sleeps in
        between when a DBLockDeadlockError exception is raised.

        A 'max_retries' parameter may optionally be passed to prevent it
        from retrying forever (in which case the exception will be reraised).

            d = DB(...)
            d.open(...)
            DeadlockWrap(d.put, "foo", data="bar")  # set key "foo" to "bar"
        """
        sleeptime = _deadlock_MinSleepTime
        max_retries = _kwargs.get('max_retries', -1)
        if 'max_retries' in _kwargs:
            del _kwargs['max_retries']
        while True:
            try:
>               return function(*_args, **_kwargs)
/usr/lib/python2.6/bsddb/dbutils.py:68: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
>   return _DeadlockWrap(lambda: self.db[key])  # self.db[key]
E   TypeError: String or Integer object expected for key, unicode found
/usr/lib/python2.6/bsddb/__init__.py:270: TypeError
------------------------------- Captured stdout --------------------------------
Downloading http://www.google.com/index.html [Done]
______________________________ test_find_by_hash _______________________________
[gw7] linux2 -- Python 2.6.9 /home/travis/virtualenv/python2.6/bin/python
@remote_data
    def test_find_by_hash():
        from urllib2 import URLError
        from ..data import get_pkg_data_filename
E       AttributeError: 'raises' object has no attribute '__exit__'
astropy/utils/tests/test_data.py:88: AttributeError
_________________________ test_data_noastropy_fallback _________________________
[gw7] linux2 -- Python 2.6.9 /home/travis/virtualenv/python2.6/bin/python
monkeypatch = <_pytest.monkeypatch.monkeypatch instance at 0xa23dea8>
recwarn = <_pytest.recwarn.WarningsRecorder instance at 0xa23da70>

    @remote_data
    def test_data_noastropy_fallback(monkeypatch, recwarn):
        """
        Tests to make sure the default behavior when the cache directory can't
        be located is correct
        """

        from .. import data
        from ...config import paths

        # needed for testing the *real* lock at the end
        lockdir = os.path.join(_get_download_cache_locs()[0], 'lock')

        #better yet, set the configuration to make sure the temp files are deleted
        data.DELETE_TEMPORARY_DOWNLOADS_AT_EXIT.set(True)

        #make sure the config and cache directories are not searched
        monkeypatch.setenv('XDG_CONFIG_HOME', 'foo')
        monkeypatch.delenv('XDG_CONFIG_HOME')
        monkeypatch.setenv('XDG_CACHE_HOME', 'bar')
        monkeypatch.delenv('XDG_CACHE_HOME')

        # make sure the _find_or_create_astropy_dir function fails as though the
        # astropy dir could not be accessed
        def osraiser(dirnm, linkto):
            raise OSError
        monkeypatch.setattr(paths, '_find_or_create_astropy_dir', osraiser)
E       AttributeError: 'raises' object has no attribute '__exit__'
astropy/utils/tests/test_data.py:212: AttributeError
______________________________ test_find_by_hash _______________________________
[gw7] linux2 -- Python 2.6.9 /home/travis/virtualenv/python2.6/bin/python
@remote_data
    def test_find_by_hash():
        from urllib2 import URLError
        from ..data import get_pkg_data_filename
E       AttributeError: 'raises' object has no attribute '__exit__'
astropy/utils/tests/test_data.py:88: AttributeError
_________________________ test_data_noastropy_fallback _________________________
[gw7] linux2 -- Python 2.6.9 /home/travis/virtualenv/python2.6/bin/python
monkeypatch = <_pytest.monkeypatch.monkeypatch instance at 0xa67f128>
recwarn = <_pytest.recwarn.WarningsRecorder instance at 0x9eb6ea8>

    @remote_data
    def test_data_noastropy_fallback(monkeypatch, recwarn):
        """
        Tests to make sure the default behavior when the cache directory can't
        be located is correct
        """

        from .. import data
        from ...config import paths

        # needed for testing the *real* lock at the end
        lockdir = os.path.join(_get_download_cache_locs()[0], 'lock')

        #better yet, set the configuration to make sure the temp files are deleted
        data.DELETE_TEMPORARY_DOWNLOADS_AT_EXIT.set(True)

        #make sure the config and cache directories are not searched
        monkeypatch.setenv('XDG_CONFIG_HOME', 'foo')
        monkeypatch.delenv('XDG_CONFIG_HOME')
        monkeypatch.setenv('XDG_CACHE_HOME', 'bar')
        monkeypatch.delenv('XDG_CACHE_HOME')

        # make sure the _find_or_create_astropy_dir function fails as though the
        # astropy dir could not be accessed
        def osraiser(dirnm, linkto):
            raise OSError
        monkeypatch.setattr(paths, '_find_or_create_astropy_dir', osraiser)
E       AttributeError: 'raises' object has no attribute '__exit__'
astropy/utils/tests/test_data.py:212: AttributeError
________________________________ test_basic_db _________________________________
[gw6] linux2 -- Python 2.6.9 /home/travis/virtualenv/python2.6/bin/python
@remote_data
    def test_basic_db():
        """Read dummy ``basic.json`` database to test underlying database
        functionality.

        """
>       basic_db = vos_catalog.get_remote_catalog_db('basic')
astropy/vo/client/tests/test_vo.py:56: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
dbname = 'basic', cache = True, verbose = True
    def get_remote_catalog_db(dbname, cache=True, verbose=True):
        """Get a database of VO services (which is a JSON file) from a remote
        location.

        Parameters
        ----------
        dbname : str
            Prefix of JSON file to download from
            ``astropy.vo.client.vos_catalog.BASEURL``.

        cache : bool
            Use caching for VO Service database. Access to actual VO
            websites referenced by the database still needs internet
            connection.

        verbose : bool
            Show download progress bars.

        Returns
        -------
        obj : `VOSDatabase` object
            A database of VO services.

        """
        with get_readable_fileobj(BASEURL() + dbname + '.json',
                                  encoding='utf8', cache=cache,
>                                 show_progress=verbose) as fd:
            tree = json.load(fd)
astropy/vo/client/vos_catalog.py:214: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
self = <contextlib.GeneratorContextManager object at 0x9b0e7d0>
    def __enter__(self):
        try:
>           return self.gen.next()
/usr/lib/python2.6/contextlib.py:16: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
name_or_obj = 'http://stsdas.stsci.edu/astrolib/vo_databases/basic.json'
encoding = 'utf8', cache = True, show_progress = True, remote_timeout = 3.0
    @contextlib.contextmanager
    def get_readable_fileobj(name_or_obj, encoding=None, cache=False,
                             show_progress=True, remote_timeout=None):
        """
        Given a filename or a readable file-like object, return a context
        manager that yields a readable file-like object.

        This supports passing filenames, URLs, and readable file-like
        objects, any of which can be compressed in gzip or bzip2.

        Notes
        -----

        This function is a context manager, and should be used for example
        as::

            with get_readable_fileobj('file.dat') as f:
                contents = f.read()

        Parameters
        ----------
        name_or_obj : str or file-like object
            The filename of the file to access (if given as a string), or
            the file-like object to access.

            If a file-like object, it must be opened in binary mode.

        encoding : str, optional
            When `None` (default), returns a file-like object with a
            `read` method that on Python 2.x returns `bytes` objects and
            on Python 3.x returns `str` (`unicode`) objects, using
            `locale.getpreferredencoding()` as an encoding.  This matches
            the default behavior of the built-in `open` when no `mode`
            argument is provided.

            When `'binary'`, returns a file-like object where its `read`
            method returns `bytes` objects.

            When another string, it is the name of an encoding, and the
            file-like object's `read` method will return `str` (`unicode`)
            objects, decoded from binary using the given encoding.

        cache : bool, optional
            Whether to cache the contents of remote URLs.

        show_progress : bool, optional
            Whether to display a progress bar if the file is downloaded
            from a remote server.  Default is `True`.

        remote_timeout : float
            Timeout for remote requests in seconds (default is the configurable
            REMOTE_TIMEOUT, which is 3s by default)

        Returns
        -------
        file : readable file-like object
        """

        # close_fds is a list of file handles created by this function
        # that need to be closed.  We don't want to always just close the
        # returned file handle, because it may simply be the file handle
        # passed in.  In that case it is not the responsibility of this
        # function to close it: doing so could result in a "double close"
        # and an "invalid file descriptor" exception.
        close_fds = []
        delete_fds = []

        if remote_timeout is None:
            # use configfile default
            remote_timeout = REMOTE_TIMEOUT()

        # Get a file object to the content
        if isinstance(name_or_obj, six.string_types):
            if _is_url(name_or_obj):
                name_or_obj = download_file(
                    name_or_obj, cache=cache, show_progress=show_progress,
>                   timeout=remote_timeout)
astropy/utils/data.py:169: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
remote_url = 'http://stsdas.stsci.edu/astrolib/vo_databases/basic.json'
cache = True, show_progress = True, timeout = 3.0
    def download_file(remote_url, cache=False, show_progress=True, timeout=REMOTE_TIMEOUT()):
        """
        Accepts a URL, downloads and optionally caches the result
        returning the filename, with a name determined by the file's MD5
        hash. If ``cache=True`` and the file is present in the cache, just
        returns the filename.

        Parameters
        ----------
        remote_url : str
            The URL of the file to download

        cache : bool, optional
            Whether to use the cache

        show_progress : bool, optional
            Whether to display a progress bar during the download (default
            is `True`)

        Returns
        -------
        local_path : str
            Returns the local path that the file was download to.

        Raises
        ------
        urllib2.URLError, urllib.error.URLError
            Whenever there's a problem getting the remote file.
        """

        from ..utils.console import ProgressBarOrSpinner

        missing_cache = False

        if cache:
            try:
                dldir, urlmapfn = _get_download_cache_locs()
            except (IOError, OSError) as e:
                msg = 'Remote data cache could not be accessed due to '
                estr = '' if len(e.args) < 1 else (': ' + str(e))
                warn(CacheMissingWarning(msg + e.__class__.__name__ + estr))
                cache = False
                missing_cache = True  # indicates that the cache is missing to raise a warning later
        try:
            if cache:
                # We don't need to acquire the lock here, since we are only reading
                with _open_shelve(urlmapfn, True) as url2hash:
                    if str(remote_url) in url2hash:
                        return url2hash[str(remote_url)]

            with contextlib.closing(urllib.request.urlopen(
                    remote_url, timeout=timeout)) as remote:
                #keep a hash to rename the local file to the hashed name
                hash = hashlib.md5()

                info = remote.info()
                if 'Content-Length' in info:
                    try:
                        size = int(info['Content-Length'])
                    except ValueError:
                        size = None
                else:
                    size = None

                if size is not None:
                    check_free_space_in_dir(gettempdir(), size)
                    if cache:
                        check_free_space_in_dir(dldir, size)

                if show_progress:
                    progress_stream = sys.stdout
                else:
                    progress_stream = io.StringIO()

                dlmsg = "Downloading {0}".format(remote_url)
                with ProgressBarOrSpinner(size, dlmsg, file=progress_stream) as p:
                    with NamedTemporaryFile(delete=False) as f:
                        try:
                            bytes_read = 0
                            block = remote.read(DOWNLOAD_CACHE_BLOCK_SIZE())
                            while block:
                                f.write(block)
                                hash.update(block)
                                bytes_read += len(block)
                                p.update(bytes_read)
                                block = remote.read(DOWNLOAD_CACHE_BLOCK_SIZE())
                        except:
                            if os.path.exists(f.name):
                                os.remove(f.name)
                            raise

            if cache:
                _acquire_download_cache_lock()
                try:
                    with _open_shelve(urlmapfn, True) as url2hash:
                        # We check now to see if another process has
                        # inadvertently written the file underneath us
                        # already
                        if str(remote_url) in url2hash:
                            return url2hash[str(remote_url)]
                        local_path = os.path.join(dldir, hash.hexdigest())
                        shutil.move(f.name, local_path)
                        url2hash[str(remote_url)] = local_path
                finally:
                    _release_download_cache_lock()
            else:
                local_path = f.name
                if missing_cache:
                    msg = ('File downloaded to temporary location due to problem '
                           'with cache directory and will not be cached.')
                    warn(CacheMissingWarning(msg, local_path))
                if DELETE_TEMPORARY_DOWNLOADS_AT_EXIT():
                    global _tempfilestodel
                    _tempfilestodel.append(local_path)
        except urllib.error.URLError as e:
            if hasattr(e, 'reason') and hasattr(e.reason, 'errno') and e.reason.errno == 8:
                e.reason.strerror = e.reason.strerror + '. requested URL: ' + remote_url
                e.reason.args = (e.reason.errno, e.reason.strerror)
>           raise e
E           HTTPError: HTTP Error 404: Not Found
astropy/utils/data.py:964: HTTPError

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions