1717import traceback
1818from html .parser import HTMLParser
1919from urllib .error import URLError
20- from urllib .request import HTTPSHandler , Request , build_opener
20+ from urllib .request import Request , urlopen
2121
2222import llnl .util .lang
2323import llnl .util .tty as tty
2626import spack
2727import spack .config
2828import spack .error
29- import spack .gcs_handler
30- import spack .s3_handler
3129import spack .url
3230import spack .util .crypto
3331import spack .util .gcs as gcs_util
3735from spack .util .executable import CommandNotFoundError , which
3836from spack .util .path import convert_to_posix_path
3937
40-
41- def _urlopen ():
42- s3 = spack .s3_handler .UrllibS3Handler ()
43- gcs = spack .gcs_handler .GCSHandler ()
44-
45- # One opener with HTTPS ssl enabled
46- with_ssl = build_opener (s3 , gcs , HTTPSHandler (context = ssl .create_default_context ()))
47-
48- # One opener with HTTPS ssl disabled
49- without_ssl = build_opener (s3 , gcs , HTTPSHandler (context = ssl ._create_unverified_context ()))
50-
51- # And dynamically dispatch based on the config:verify_ssl.
52- def dispatch_open (* args , ** kwargs ):
53- opener = with_ssl if spack .config .get ("config:verify_ssl" , True ) else without_ssl
54- return opener .open (* args , ** kwargs )
55-
56- return dispatch_open
57-
58-
59- #: Dispatches to the correct OpenerDirector.open, based on Spack configuration.
60- urlopen = llnl .util .lang .Singleton (_urlopen )
61-
6238#: User-Agent used in Request objects
6339SPACK_USER_AGENT = "Spackbot/{0}" .format (spack .spack_version )
6440
@@ -83,12 +59,43 @@ def handle_starttag(self, tag, attrs):
8359 self .links .append (val )
8460
8561
62+ def uses_ssl (parsed_url ):
63+ if parsed_url .scheme == "https" :
64+ return True
65+
66+ if parsed_url .scheme == "s3" :
67+ endpoint_url = os .environ .get ("S3_ENDPOINT_URL" )
68+ if not endpoint_url :
69+ return True
70+
71+ if url_util .parse (endpoint_url , scheme = "https" ).scheme == "https" :
72+ return True
73+
74+ elif parsed_url .scheme == "gs" :
75+ tty .debug ("(uses_ssl) GCS Blob is https" )
76+ return True
77+
78+ return False
79+
80+
8681def read_from_url (url , accept_content_type = None ):
8782 url = url_util .parse (url )
83+ context = None
8884
8985 # Timeout in seconds for web requests
9086 timeout = spack .config .get ("config:connect_timeout" , 10 )
9187
88+ # Don't even bother with a context unless the URL scheme is one that uses
89+ # SSL certs.
90+ if uses_ssl (url ):
91+ if spack .config .get ("config:verify_ssl" ):
92+ # User wants SSL verification, and it *can* be provided.
93+ context = ssl .create_default_context ()
94+ else :
95+ # User has explicitly indicated that they do not want SSL
96+ # verification.
97+ context = ssl ._create_unverified_context ()
98+
9299 url_scheme = url .scheme
93100 url = url_util .format (url )
94101 if sys .platform == "win32" and url_scheme == "file" :
@@ -104,15 +111,15 @@ def read_from_url(url, accept_content_type=None):
104111 # one round-trip. However, most servers seem to ignore the header
105112 # if you ask for a tarball with Accept: text/html.
106113 req .get_method = lambda : "HEAD"
107- resp = urlopen (req , timeout = timeout )
114+ resp = _urlopen (req , timeout = timeout , context = context )
108115
109116 content_type = get_header (resp .headers , "Content-type" )
110117
111118 # Do the real GET request when we know it's just HTML.
112119 req .get_method = lambda : "GET"
113120
114121 try :
115- response = urlopen (req , timeout = timeout )
122+ response = _urlopen (req , timeout = timeout , context = context )
116123 except URLError as err :
117124 raise SpackWebError ("Download failed: {ERROR}" .format (ERROR = str (err )))
118125
@@ -344,6 +351,12 @@ def url_exists(url, curl=None):
344351 Simple Storage Service (`s3`) URLs; otherwise, the configured fetch
345352 method defined by `config:url_fetch_method` is used.
346353
354+ If the method is `curl`, it also uses the following configuration option:
355+
356+ * config:verify_ssl (str): Perform SSL verification
357+
358+ Otherwise, `urllib` will be used.
359+
347360 Arguments:
348361 url (str): URL whose existence is being checked
349362 curl (spack.util.executable.Executable or None): (optional) curl
@@ -354,11 +367,31 @@ def url_exists(url, curl=None):
354367 tty .debug ("Checking existence of {0}" .format (url ))
355368 url_result = url_util .parse (url )
356369
357- # Use curl if configured to do so
358- use_curl = spack .config .get (
359- "config:url_fetch_method" , "urllib"
360- ) == "curl" and url_result .scheme not in ("gs" , "s3" )
361- if use_curl :
370+ # Check if a local file
371+ local_path = url_util .local_file_path (url_result )
372+ if local_path :
373+ return os .path .exists (local_path )
374+
375+ # Check if Amazon Simple Storage Service (S3) .. urllib-based fetch
376+ if url_result .scheme == "s3" :
377+ # Check for URL-specific connection information
378+ s3 = s3_util .get_s3_session (url_result , method = "fetch" )
379+
380+ try :
381+ s3 .get_object (Bucket = url_result .netloc , Key = url_result .path .lstrip ("/" ))
382+ return True
383+ except s3 .ClientError as err :
384+ if err .response ["Error" ]["Code" ] == "NoSuchKey" :
385+ return False
386+ raise err
387+
388+ # Check if Google Storage .. urllib-based fetch
389+ if url_result .scheme == "gs" :
390+ gcs = gcs_util .GCSBlob (url_result )
391+ return gcs .exists ()
392+
393+ # Otherwise, use the configured fetch method
394+ if spack .config .get ("config:url_fetch_method" ) == "curl" :
362395 curl_exe = _curl (curl )
363396 if not curl_exe :
364397 return False
@@ -371,14 +404,13 @@ def url_exists(url, curl=None):
371404 _ = curl_exe (* curl_args , fail_on_error = False , output = os .devnull )
372405 return curl_exe .returncode == 0
373406
374- # Otherwise use urllib.
407+ # If we get here, then the only other fetch method option is urllib.
408+ # So try to "read" from the URL and assume that *any* non-throwing
409+ # response contains the resource represented by the URL.
375410 try :
376- urlopen (
377- Request (url , method = "HEAD" , headers = {"User-Agent" : SPACK_USER_AGENT }),
378- timeout = spack .config .get ("config:connect_timeout" , 10 ),
379- )
411+ read_from_url (url )
380412 return True
381- except URLError as e :
413+ except ( SpackWebError , URLError ) as e :
382414 tty .debug ("Failure reading URL: " + str (e ))
383415 return False
384416
@@ -661,6 +693,35 @@ def _spider(url, collect_nested):
661693 return pages , links
662694
663695
696+ def _urlopen (req , * args , ** kwargs ):
697+ """Wrapper for compatibility with old versions of Python."""
698+ url = req
699+ try :
700+ url = url .get_full_url ()
701+ except AttributeError :
702+ pass
703+
704+ del kwargs ["context" ]
705+
706+ opener = urlopen
707+ if url_util .parse (url ).scheme == "s3" :
708+ import spack .s3_handler
709+
710+ opener = spack .s3_handler .open
711+ elif url_util .parse (url ).scheme == "gs" :
712+ import spack .gcs_handler
713+
714+ opener = spack .gcs_handler .gcs_open
715+
716+ try :
717+ return opener (req , * args , ** kwargs )
718+ except TypeError as err :
719+ # If the above fails because of 'context', call without 'context'.
720+ if "context" in kwargs and "context" in str (err ):
721+ del kwargs ["context" ]
722+ return opener (req , * args , ** kwargs )
723+
724+
664725def find_versions_of_archive (
665726 archive_urls , list_url = None , list_depth = 0 , concurrency = 32 , reference_package = None
666727):
0 commit comments