Skip to content

Commit 46eabb0

Browse files
committed
move follow_redirects, expect_requests_*, and HandlerTest here from granary
1 parent af4880b commit 46eabb0

File tree

3 files changed

+252
-57
lines changed

3 files changed

+252
-57
lines changed

test/test_util.py

+41
Original file line numberDiff line numberDiff line change
@@ -729,3 +729,44 @@ def test_base_url(self):
729729
('http://site/path/', 'http://site/path/leaf?query#frag'),
730730
):
731731
self.assertEquals(expected, util.base_url(url))
732+
733+
def test_follow_redirects(self):
734+
for i in range(2):
735+
self.expect_requests_head('http://will/redirect',
736+
redirected_url='http://final/url')
737+
self.mox.ReplayAll()
738+
739+
cache = util.CacheDict()
740+
self.assert_equals(
741+
'http://final/url',
742+
util.follow_redirects('http://will/redirect', cache=cache).url)
743+
744+
self.assertEquals('http://final/url', cache['R http://will/redirect'].url)
745+
746+
# another call without cache should refetch
747+
self.assert_equals(
748+
'http://final/url',
749+
util.follow_redirects('http://will/redirect').url)
750+
751+
# another call with cache shouldn't refetch
752+
self.assert_equals(
753+
'http://final/url',
754+
util.follow_redirects('http://will/redirect', cache=cache).url)
755+
756+
def test_follow_redirects_with_refresh_header(self):
757+
headers = {'x': 'y'}
758+
self.expect_requests_head('http://will/redirect', headers=headers,
759+
response_headers={'refresh': '0; url=http://refresh'})
760+
self.expect_requests_head('http://refresh', headers=headers,
761+
redirected_url='http://final')
762+
763+
self.mox.ReplayAll()
764+
cache = util.CacheDict()
765+
self.assert_equals('http://final',
766+
util.follow_redirects('http://will/redirect', cache=cache,
767+
headers=headers).url)
768+
769+
def test_follow_redirects_defaults_scheme_to_http(self):
770+
self.expect_requests_head('http://foo/bar', redirected_url='http://final')
771+
self.mox.ReplayAll()
772+
self.assert_equals('http://final', util.follow_redirects('foo/bar').url)

testutil.py

+145-57
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import urlparse
1919

2020
import appengine_config
21+
import requests
2122
import webapp2
2223

2324
from google.appengine.datastore import datastore_stub_util
@@ -40,79 +41,120 @@ def get_task_eta(task):
4041
float(dict(task['headers'])['X-AppEngine-TaskETA']))
4142

4243

43-
class HandlerTest(mox.MoxTestBase):
44-
"""Base test class for webapp2 request handlers.
45-
46-
Uses App Engine's testbed to set up API stubs:
47-
http://code.google.com/appengine/docs/python/tools/localunittesting.html
48-
49-
Attributes:
50-
application: WSGIApplication
51-
handler: webapp2.RequestHandler
44+
class UrlopenResult(object):
45+
"""A fake urllib2.urlopen() result object. Also works for urlfetch.fetch().
5246
"""
47+
def __init__(self, status_code, content, url=None, headers={}):
48+
self.status_code = status_code
49+
self.content = StringIO.StringIO(content)
50+
self.url = url
51+
self.headers = headers
5352

54-
class UrlopenResult(object):
55-
"""A fake urllib2.urlopen() result object. Also works for urlfetch.fetch().
56-
"""
57-
def __init__(self, status_code, content, url=None, headers={}):
58-
self.status_code = status_code
59-
self.content = StringIO.StringIO(content)
60-
self.url = url
61-
self.headers = headers
53+
def read(self, length=-1):
54+
return self.content.read(length)
6255

63-
def read(self, length=-1):
64-
return self.content.read(length)
56+
def getcode(self):
57+
return self.status_code
6558

66-
def getcode(self):
67-
return self.status_code
59+
def geturl(self):
60+
return self.url
6861

69-
def geturl(self):
70-
return self.url
62+
def info(self):
63+
return rfc822.Message(StringIO.StringIO(
64+
'\n'.join('%s: %s' % item for item in self.headers.items())))
7165

72-
def info(self):
73-
return rfc822.Message(StringIO.StringIO(
74-
'\n'.join('%s: %s' % item for item in self.headers.items())))
7566

67+
class TestCase(mox.MoxTestBase):
68+
"""Test case class with lots of extra helpers."""
7669

7770
def setUp(self):
78-
super(HandlerTest, self).setUp()
71+
super(TestCase, self).setUp()
72+
for method in 'get', 'post':
73+
self.mox.StubOutWithMock(requests, method, use_mock_anything=True)
74+
self.stub_requests_head()
7975

80-
logging.getLogger().removeHandler(appengine_config.ereporter_logging_handler)
76+
self.mox.StubOutWithMock(urllib2, 'urlopen')
8177

82-
os.environ['APPLICATION_ID'] = 'app_id'
83-
self.current_user_id = '123'
84-
self.current_user_email = '[email protected]'
78+
# set time zone to UTC so that tests don't depend on local time zone
79+
os.environ['TZ'] = 'UTC'
8580

86-
self.testbed = testbed.Testbed()
87-
self.testbed.setup_env(user_id=self.current_user_id,
88-
user_email=self.current_user_email)
89-
self.testbed.activate()
81+
def stub_requests_head(self):
82+
"""Automatically return 200 to outgoing HEAD requests."""
83+
def fake_head(url, **kwargs):
84+
resp = requests.Response()
85+
resp.url = url
86+
if '.' in url or url.startswith('http'):
87+
resp.headers['content-type'] = 'text/html; charset=UTF-8'
88+
resp.status_code = 200
89+
else:
90+
resp.status_code = 404
91+
return resp
92+
self.mox.stubs.Set(requests, 'head', fake_head)
9093

91-
hrd_policy = datastore_stub_util.PseudoRandomHRConsistencyPolicy(probability=.5)
92-
self.testbed.init_datastore_v3_stub(consistency_policy=hrd_policy)
93-
self.testbed.init_taskqueue_stub(root_path='.')
94-
self.testbed.init_user_stub()
95-
self.testbed.init_mail_stub()
96-
self.testbed.init_memcache_stub()
97-
self.testbed.init_logservice_stub()
94+
self._is_head_mocked = False # expect_requests_head() sets this to True
9895

99-
self.mox.StubOutWithMock(urllib2, 'urlopen')
96+
def unstub_requests_head(self):
97+
"""Mock outgoing HEAD requests so they must be expected individually."""
98+
if not self._is_head_mocked:
99+
self.mox.StubOutWithMock(requests, 'head', use_mock_anything=True)
100+
self._is_head_mocked = True
100101

101-
# unofficial API, whee! this is so we can call
102-
# TaskQueueServiceStub.GetTasks() in tests. see
103-
# google/appengine/api/taskqueue/taskqueue_stub.py
104-
self.taskqueue_stub = self.testbed.get_stub('taskqueue')
102+
def expect_requests_head(self, *args, **kwargs):
103+
self.unstub_requests_head()
104+
return self._expect_requests_call(*args, method=requests.head, **kwargs)
105105

106-
self.request = webapp2.Request.blank('/')
107-
self.response = webapp2.Response()
108-
self.handler = webapp2.RequestHandler(self.request, self.response)
106+
def expect_requests_get(self, *args, **kwargs):
107+
return self._expect_requests_call(*args, method=requests.get, **kwargs)
109108

110-
# set time zone to UTC so that tests don't depend on local time zone
111-
os.environ['TZ'] = 'UTC'
109+
def expect_requests_post(self, *args, **kwargs):
110+
return self._expect_requests_call(*args, method=requests.post, **kwargs)
112111

113-
def tearDown(self):
114-
self.testbed.deactivate()
115-
super(HandlerTest, self).tearDown()
112+
def _expect_requests_call(self, url, response='', status_code=200,
113+
content_type='text/html', method=requests.get,
114+
redirected_url=None, response_headers=None,
115+
**kwargs):
116+
"""
117+
Args:
118+
redirected_url: string URL or sequence of string URLs for multiple redirects
119+
"""
120+
resp = requests.Response()
121+
122+
resp._text = response
123+
resp._content = (response.encode('utf-8') if isinstance(response, unicode)
124+
else response)
125+
resp.encoding = 'utf-8'
126+
127+
resp.url = url
128+
if redirected_url is not None:
129+
if isinstance(redirected_url, basestring):
130+
redirected_url = [redirected_url]
131+
assert isinstance(redirected_url, (list, tuple))
132+
resp.url = redirected_url[-1]
133+
for u in [url] + redirected_url[:-1]:
134+
resp.history.append(requests.Response())
135+
resp.history[-1].url = u
136+
137+
resp.status_code = status_code
138+
resp.headers['content-type'] = content_type
139+
if response_headers is not None:
140+
resp.headers.update(response_headers)
141+
142+
kwargs.setdefault('timeout', appengine_config.HTTP_TIMEOUT)
143+
if method is requests.head:
144+
kwargs['allow_redirects'] = True
145+
146+
files = kwargs.get('files')
147+
if files:
148+
def check_files(actual):
149+
self.assertEqual(actual.keys(), files.keys())
150+
for name, expected in files.items():
151+
self.assertEqual(expected, actual[name].read())
152+
return True
153+
kwargs['files'] = mox.Func(check_files)
154+
155+
call = method(url, **kwargs)
156+
call.AndReturn(resp)
157+
return call
116158

117159
def expect_urlopen(self, url, response=None, status=200, data=None,
118160
headers=None, response_headers={}, **kwargs):
@@ -170,8 +212,8 @@ def check_request(req):
170212
call.AndRaise(urllib2.HTTPError('url', status, 'message',
171213
response_headers, response))
172214
elif response is not None:
173-
call.AndReturn(self.UrlopenResult(status, response, url=url,
174-
headers=response_headers))
215+
call.AndReturn(UrlopenResult(status, response, url=url,
216+
headers=response_headers))
175217

176218
return call
177219

@@ -304,3 +346,49 @@ def _normalize_lines(val):
304346
lines = [l.strip() + '\n' for l in val.splitlines(True)]
305347
return [l for i, l in enumerate(lines)
306348
if i <= 1 or not (lines[i - 1] == l == '\n')]
349+
350+
351+
class HandlerTest(TestCase):
352+
"""Base test class for webapp2 request handlers.
353+
354+
Uses App Engine's testbed to set up API stubs:
355+
http://code.google.com/appengine/docs/python/tools/localunittesting.html
356+
357+
Attributes:
358+
application: WSGIApplication
359+
handler: webapp2.RequestHandler
360+
"""
361+
def setUp(self):
362+
super(HandlerTest, self).setUp()
363+
364+
logging.getLogger().removeHandler(appengine_config.ereporter_logging_handler)
365+
366+
os.environ['APPLICATION_ID'] = 'app_id'
367+
self.current_user_id = '123'
368+
self.current_user_email = '[email protected]'
369+
370+
self.testbed = testbed.Testbed()
371+
self.testbed.setup_env(user_id=self.current_user_id,
372+
user_email=self.current_user_email)
373+
self.testbed.activate()
374+
375+
hrd_policy = datastore_stub_util.PseudoRandomHRConsistencyPolicy(probability=.5)
376+
self.testbed.init_datastore_v3_stub(consistency_policy=hrd_policy)
377+
self.testbed.init_taskqueue_stub(root_path='.')
378+
self.testbed.init_user_stub()
379+
self.testbed.init_mail_stub()
380+
self.testbed.init_memcache_stub()
381+
self.testbed.init_logservice_stub()
382+
383+
# unofficial API, whee! this is so we can call
384+
# TaskQueueServiceStub.GetTasks() in tests. see
385+
# google/appengine/api/taskqueue/taskqueue_stub.py
386+
self.taskqueue_stub = self.testbed.get_stub('taskqueue')
387+
388+
self.request = webapp2.Request.blank('/')
389+
self.response = webapp2.Response()
390+
self.handler = webapp2.RequestHandler(self.request, self.response)
391+
392+
def tearDown(self):
393+
self.testbed.deactivate()
394+
super(HandlerTest, self).tearDown()

util.py

+66
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import inspect
1313
import json
1414
import logging
15+
import mimetypes
1516
import numbers
1617
import os
1718
import re
@@ -1001,3 +1002,68 @@ def call(url, *args, **kwargs):
10011002
def _prune(kwargs):
10021003
return {k: v for k, v in kwargs.items()
10031004
if k not in ('allow_redirects', 'headers', 'stream', 'timeout')}
1005+
1006+
1007+
def follow_redirects(url, cache=None, fail_cache_time_secs = 60 * 60 * 24, # a day
1008+
**kwargs):
1009+
"""Fetches a URL with HEAD, repeating if necessary to follow redirects.
1010+
1011+
*Does not* raise an exception if any of the HTTP requests fail, just returns
1012+
the failed response. If you care, be sure to check the returned response's
1013+
status code!
1014+
1015+
Args:
1016+
url: string
1017+
cache: optional, a cache object to read and write resolved URLs to. Must
1018+
have get(key) and set(key, value, time=...) methods. Stores
1019+
'R [original URL]' in key, final URL in value.
1020+
**kwargs: passed to requests.head()
1021+
1022+
Returns:
1023+
the requests.Response for the final request. The `url` attribute has the
1024+
final URL.
1025+
"""
1026+
if cache is not None:
1027+
cache_key = 'R ' + url
1028+
resolved = cache.get(cache_key)
1029+
if resolved is not None:
1030+
return resolved
1031+
1032+
# can't use urllib2 since it uses GET on redirect requests, even if i specify
1033+
# HEAD for the initial request.
1034+
# http://stackoverflow.com/questions/9967632
1035+
try:
1036+
# default scheme to http
1037+
parsed = urlparse.urlparse(url)
1038+
if not parsed.scheme:
1039+
url = 'http://' + url
1040+
resolved = requests_head(url, allow_redirects=True, **kwargs)
1041+
resolved.raise_for_status()
1042+
if resolved.url != url:
1043+
logging.debug('Resolved %s to %s', url, resolved.url)
1044+
cache_time = 0 # forever
1045+
except AssertionError:
1046+
raise
1047+
except BaseException, e:
1048+
logging.warning("Couldn't resolve URL %s : %s", url, e)
1049+
resolved = requests.Response()
1050+
resolved.url = url
1051+
resolved.status_code = 499 # not standard. i made this up.
1052+
cache_time = fail_cache_time_secs
1053+
1054+
content_type = resolved.headers.get('content-type')
1055+
if not content_type:
1056+
type, _ = mimetypes.guess_type(resolved.url)
1057+
resolved.headers['content-type'] = type or 'text/html'
1058+
1059+
refresh = resolved.headers.get('refresh')
1060+
if refresh:
1061+
for part in refresh.split(';'):
1062+
if part.strip().startswith('url='):
1063+
return follow_redirects(part.strip()[4:], cache=cache, **kwargs)
1064+
1065+
resolved.url = clean_url(resolved.url)
1066+
if cache is not None:
1067+
cache.set_multi({cache_key: resolved, 'R ' + resolved.url: resolved},
1068+
time=cache_time)
1069+
return resolved

0 commit comments

Comments
 (0)