Skip to content

Commit c194aa3

Browse files
committed
move follow_redirects and testutil to webutil
...in snarfed/webutil@46eabb0
1 parent e74cdfc commit c194aa3

9 files changed

+15
-215
lines changed

granary/source.py

+1-65
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,6 @@
4040
'invite': 'invited',
4141
}
4242

43-
FAILED_RESOLVE_URL_CACHE_TIME = 60 * 60 * 24 # a day
44-
4543
# maps lower case string short name to Source subclass. populated by SourceMeta.
4644
sources = {}
4745

@@ -497,7 +495,7 @@ def original_post_discovery(activity, domains=None, cache=None,
497495
# check for redirect and add their final urls
498496
redirects = {} # maps final URL to original URL for redirects
499497
for url in list(candidates):
500-
resolved = follow_redirects(url, cache=cache, **kwargs)
498+
resolved = util.follow_redirects(url, cache=cache, **kwargs)
501499
if (resolved.url != url and
502500
resolved.headers.get('content-type', '').startswith('text/html')):
503501
redirects[resolved.url] = url
@@ -767,65 +765,3 @@ def _html_to_text(self, html):
767765
return '\n'.join(
768766
# strip trailing whitespace that html2text adds to ends of some lines
769767
line.rstrip() for line in h.unescape(h.handle(html)).splitlines())
770-
771-
def follow_redirects(url, cache=None, **kwargs):
772-
"""Fetches a URL with HEAD, repeating if necessary to follow redirects.
773-
774-
Caches resolved URLs in memcache by default. *Does not* raise an exception if
775-
any of the HTTP requests fail, just returns the failed response. If you care,
776-
be sure to check the returned response's status code!
777-
778-
Args:
779-
url: string
780-
cache: optional, a cache object to read and write resolved URLs to. Must
781-
have get(key) and set(key, value, time=...) methods. Stores
782-
'R [original URL]' in key, final URL in value.
783-
**kwargs: passed to requests.head()
784-
785-
Returns:
786-
the requests.Response for the final request
787-
"""
788-
if cache is not None:
789-
cache_key = 'R ' + url
790-
resolved = cache.get(cache_key)
791-
if resolved is not None:
792-
return resolved
793-
794-
# can't use urllib2 since it uses GET on redirect requests, even if i specify
795-
# HEAD for the initial request.
796-
# http://stackoverflow.com/questions/9967632
797-
try:
798-
# default scheme to http
799-
parsed = urlparse.urlparse(url)
800-
if not parsed.scheme:
801-
url = 'http://' + url
802-
resolved = util.requests_head(url, allow_redirects=True, **kwargs)
803-
resolved.raise_for_status()
804-
if resolved.url != url:
805-
logging.debug('Resolved %s to %s', url, resolved.url)
806-
cache_time = 0 # forever
807-
except AssertionError:
808-
raise
809-
except BaseException, e:
810-
logging.warning("Couldn't resolve URL %s : %s", url, e)
811-
resolved = requests.Response()
812-
resolved.url = url
813-
resolved.status_code = 499 # not standard. i made this up.
814-
cache_time = FAILED_RESOLVE_URL_CACHE_TIME
815-
816-
content_type = resolved.headers.get('content-type')
817-
if not content_type:
818-
type, _ = mimetypes.guess_type(resolved.url)
819-
resolved.headers['content-type'] = type or 'text/html'
820-
821-
refresh = resolved.headers.get('refresh')
822-
if refresh:
823-
for part in refresh.split(';'):
824-
if part.strip().startswith('url='):
825-
return follow_redirects(part.strip()[4:], cache=cache, **kwargs)
826-
827-
resolved.url = util.clean_url(resolved.url)
828-
if cache is not None:
829-
cache.set_multi({cache_key: resolved, 'R ' + resolved.url: resolved},
830-
time=cache_time)
831-
return resolved

granary/test/test_facebook.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@
99
import urllib
1010
import urllib2
1111

12+
from oauth_dropins.webutil import testutil
1213
from oauth_dropins.webutil import util
1314

1415
from granary import appengine_config
1516
from granary import facebook
1617
from granary import source
17-
from granary import testutil
1818

1919

2020
# test data

granary/test/test_flickr.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88
import urllib
99
import urllib2
1010

11-
from oauth_dropins.webutil import util
1211
from oauth_dropins import appengine_config
12+
from oauth_dropins.webutil import testutil
13+
from oauth_dropins.webutil import util
1314
from granary import flickr
1415
from granary import source
15-
from granary import testutil
1616

1717
__author__ = ['Kyle Mahan <[email protected]>']
1818

granary/test/test_instagram.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,11 @@
1313
import urllib2
1414
import httplib2
1515

16+
from oauth_dropins.webutil import testutil
17+
from oauth_dropins.webutil import util
18+
1619
from granary import instagram
1720
from granary import source
18-
from granary import testutil
19-
from oauth_dropins.webutil import util
2021

2122

2223
def tag_uri(name):

granary/test/test_microformats2.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88

99
import re
1010

11+
from oauth_dropins.webutil import testutil
1112
import mf2py
1213

1314
from granary import microformats2
14-
from granary import testutil
1515

1616

1717
class Microformats2Test(testutil.HandlerTest):

granary/test/test_source.py

+3-44
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,15 @@
66

77
import copy
88

9+
from oauth_dropins.webutil import testutil
10+
from oauth_dropins.webutil import util
11+
912
from granary import facebook
1013
from granary import googleplus
1114
from granary import instagram
1215
from granary import source
1316
from granary.source import Source
14-
from granary import testutil
1517
from granary import twitter
16-
from oauth_dropins.webutil import util
17-
1818
import test_facebook
1919
import test_googleplus
2020

@@ -358,47 +358,6 @@ def test_sources_global(self):
358358
self.assertEquals(instagram.Instagram, source.sources['instagram'])
359359
self.assertEquals(twitter.Twitter, source.sources['twitter'])
360360

361-
def test_follow_redirects(self):
362-
for i in range(2):
363-
self.expect_requests_head('http://will/redirect',
364-
redirected_url='http://final/url')
365-
self.mox.ReplayAll()
366-
367-
cache = util.CacheDict()
368-
self.assert_equals(
369-
'http://final/url',
370-
source.follow_redirects('http://will/redirect', cache=cache).url)
371-
372-
self.assertEquals('http://final/url', cache['R http://will/redirect'].url)
373-
374-
# another call without cache should refetch
375-
self.assert_equals(
376-
'http://final/url',
377-
source.follow_redirects('http://will/redirect').url)
378-
379-
# another call with cache shouldn't refetch
380-
self.assert_equals(
381-
'http://final/url',
382-
source.follow_redirects('http://will/redirect', cache=cache).url)
383-
384-
def test_follow_redirects_with_refresh_header(self):
385-
headers = {'x': 'y'}
386-
self.expect_requests_head('http://will/redirect', headers=headers,
387-
response_headers={'refresh': '0; url=http://refresh'})
388-
self.expect_requests_head('http://refresh', headers=headers,
389-
redirected_url='http://final')
390-
391-
self.mox.ReplayAll()
392-
cache = util.CacheDict()
393-
self.assert_equals('http://final',
394-
source.follow_redirects('http://will/redirect', cache=cache,
395-
headers=headers).url)
396-
397-
def test_follow_redirects_defaults_scheme_to_http(self):
398-
self.expect_requests_head('http://foo/bar', redirected_url='http://final')
399-
self.mox.ReplayAll()
400-
self.assert_equals('http://final', source.follow_redirects('foo/bar').url)
401-
402361
def test_post_id(self):
403362
self.assertEquals('1', self.source.post_id('http://x/y/1'))
404363
self.assertEquals('1', self.source.post_id('http://x/y/1/'))

granary/test/test_testdata.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
import logging
77
import os
88

9+
from oauth_dropins.webutil import testutil
10+
from oauth_dropins.webutil import util
11+
912
from granary import microformats2
10-
from granary import testutil
1113

1214

1315
def filepairs(ext1, ext2s):

granary/test/test_twitter.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@
1212
import urllib2
1313

1414
from oauth_dropins import appengine_config
15+
from oauth_dropins.webutil import testutil
1516
from oauth_dropins.webutil import util
1617

1718
from granary import microformats2
1819
from granary import source
19-
from granary import testutil
2020
from granary import twitter
2121

2222
__author__ = ['Ryan Barrett <[email protected]>']

granary/testutil.py

-98
This file was deleted.

0 commit comments

Comments
 (0)