From 245f1225d953634f23e6d01ffcb74fc7bd2ce98e Mon Sep 17 00:00:00 2001 From: blizzard Date: Wed, 19 Nov 2008 01:42:09 +0000 Subject: [PATCH] 2008-11-18 Christopher Blizzard * test-ws.cfg: Enable base_url_filter.base_url to localhost:9090 for testing to generate proper redirects for tests. * whoisi/test_controller.py (TestController): Lots of new methods that generate redirects to test url handling. * tests/twisted/network/test_download.py: Lots of changes to support the new entity_url argument for the download command. (TestDownload.test_redirect): Test that tests working redirects. (TestDownload.test_redirect_too_many): Test that will make sure we throw an exception if there are too many redirects for a resource. * services/command/exceptions.py (TooManyRedirectsError): New exception when a resource has too many redirects. * services/command/download.py: More ongoing work to handle etag and last-modified headers. Also first steps to improve support for redirects. doCommand has been modified to require etag, last_modified and the entry_url to which the etag and last_modified apply. (The original url which might generate redirects might also not have the tags applied to it.) There is also a url stack that is saved as urls are followed. This can be used to match many possible urls to a single resource. (i.e. what feedburner does.) This will also top out at 5 redirects for a single resource and then throw a TooManyRedirectsError. git-svn-id: svn://trac.whoisi.com/whoisi/trunk@12 ae879524-a8bd-4c4c-a5ea-74d2e5fc5a2c --- ChangeLog | 28 +++++++++ services/command/download.py | 83 ++++++++++++++++++++++---- services/command/exceptions.py | 6 ++ test-ws.cfg | 2 + tests/twisted/network/test_download.py | 46 ++++++++++++-- whoisi/test_controller.py | 22 ++++++- 6 files changed, 170 insertions(+), 17 deletions(-) diff --git a/ChangeLog b/ChangeLog index a372a1b..a37117e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,31 @@ +2008-11-18 Christopher Blizzard + + * test-ws.cfg: Enable base_url_filter.base_url to localhost:9090 + for testing to generate proper redirects for tests. + + * whoisi/test_controller.py (TestController): Lots of new methods + that generate redirects to test url handling. + + * tests/twisted/network/test_download.py: Lots of changes to + support the new entity_url argument for the download command. + (TestDownload.test_redirect): Test that tests working redirects. + (TestDownload.test_redirect_too_many): Test that will make sure we + throw an exception if there are too many redirects for a resource. + + * services/command/exceptions.py (TooManyRedirectsError): New + exception when a resource has too many redirects. + + * services/command/download.py: More ongoing work to handle etag + and last-modified headers. Also first steps to improve support + for redirects. doCommand has been modified to require etag, + last_modified and the entry_url to which the etag and + last_modified apply. (The original url which might generate + redirects might also not have the tags applied to it.) There is + also a url stack that is saved as urls are followed. This can be + used to match many possible urls to a single resource. (i.e. what + feedburner does.) This will also top out at 5 redirects for a + single resource and then throw a TooManyRedirectsError. + 2008-11-12 Christopher Blizzard * whoisi/test_controller.py: New test controller that is used by diff --git a/services/command/download.py b/services/command/download.py index 9ff4ae3..15f44e0 100644 --- a/services/command/download.py +++ b/services/command/download.py @@ -21,8 +21,10 @@ # SOFTWARE. from services.command.base import BaseCommand -from services.command.exceptions import NotModifiedError +from services.command.exceptions import NotModifiedError, TooManyRedirectsError +from services.command.utils import resolve_relative_url from twisted.web.client import _parse, HTTPDownloader, HTTPPageDownloader +from twisted.web import error from twisted.internet import defer, reactor import services.config as config @@ -59,8 +61,11 @@ class DownloadCommand(BaseCommand): BaseCommand.__init__(self) self.name = "download" self.d = defer.Deferred() + self.url_stack = [] - def doCommand(self, state, url=None, etag=None, last_modified=None,*args, **kw): + def doCommand(self, state, url=None, + etag=None, last_modified=None, entity_url=None, + *args, **kw): """ Pass in a url. This will return a deferred that will eventually call back to you with a (result, filename) pair to @@ -68,16 +73,45 @@ class DownloadCommand(BaseCommand): to delete the file once you have finished with it. """ self.state = state - # for testing - if self.getTest(state) == "download_connection_refused": - url = "http://localhost:9091/something.html" - if self.getTest(state) == "download_404": - url = "http://localhost:9090/something.html" + self.etag = etag + self.last_modified = last_modified + self.entity_url = entity_url + try: # ugh, this should probably not be here like this if not url: url = str(state["url"]) print(" downloading %s" % url) + except Exception, e: + print(" exception during download start") + self.d.errback(twisted.python.failure.Failure(e)) + + self.startDownload(url) + + return self.d + + def startDownload(self, url): + # for testing + if self.getTest(self.state) == "download_connection_refused": + url = "http://localhost:9091/something.html" + if self.getTest(self.state) == "download_404": + url = "http://localhost:9090/something.html" + + # save this url on the url stack + self.url_stack.append(url) + + etag = None + last_modified = None + + # we only send along the etag and last_modified headers on the + # url if it matches the last body that we downloaded + if url == self.entity_url: + etag = self.etag + last_modified = self.last_modified + else: + print(" url doesn't match entity url") + + try: # check the URL to make sure it's valid formencode.validators.URL(add_http=False).to_python(url) # except formencode.api.Invalid: @@ -101,12 +135,34 @@ class DownloadCommand(BaseCommand): print(" downloadDone") print(" etag %s" % str(etag)) print(" last-modified %s" % str(last_modified)) + print(" url stack %s" % str(self.url_stack)) self.state["download_etag"] = result["etag"] self.state["download_last_modified"] = result["last_modified"] + self.state["download_url_stack"] = self.url_stack self.d.callback(filename) def downloadError(self, failure): - print(" downloadError") + if failure.check(error.PageRedirect): + print(" page redirect") + print(" location: %s" % failure.value.location) + print(" status: %s" % failure.value.status) + + # prevent us from following too many redirects + if len(self.url_stack) == 5: + self.d.errback(twisted.python.failure.Failure(TooManyRedirectsError(None))) + return + + # resolve a relative redirect + location = resolve_relative_url(failure.value.location, + self.url_stack[-1]) + + print(" final location %s" % location) + + # ...and restart our download + self.startDownload(location) + return + + print(" downloadError %s" % str(failure)) self.d.errback(failure) @@ -198,9 +254,14 @@ class LocalDownloader(HTTPDownloader): agent="Twisted client", supportPartial=0): self.saved_etag = None self.saved_last_modified = None - return HTTPDownloader.__init__(self, url, fileOrName, - method, postdata, headers, - agent, supportPartial) + retval = HTTPDownloader.__init__(self, url, fileOrName, + method, postdata, headers, + agent, supportPartial) + + # We override this because HTTPDownloader doesn't pass along + # followRedirect to the constructor for HTTPClientFactory + self.protocol.followRedirect = 0 + return retval def gotHeaders(self, headers): if headers.has_key("etag"): diff --git a/services/command/exceptions.py b/services/command/exceptions.py index ee2f3d1..3e96f8a 100644 --- a/services/command/exceptions.py +++ b/services/command/exceptions.py @@ -36,6 +36,12 @@ class NotModifiedError(Exception): def __str__(self): return repr(self.value) +class TooManyRedirectsError(Exception): + def __init__(self, value): + self.value = value + def __str__(self): + return repr(self.value) + class FeedNotFoundError(Exception): def __init__(self, value): self.value = value diff --git a/test-ws.cfg b/test-ws.cfg index f2d1cd3..c34c302 100644 --- a/test-ws.cfg +++ b/test-ws.cfg @@ -32,6 +32,8 @@ sqlobject.dburi="mysql://root@127.0.0.1:9999/whoisi?charset=utf8" # SERVER # Some server parameters that you may want to tweak +base_url_filter.on = True +base_url_filter.base_url = "http://localhost:9090/" server.socket_port=9090 # Enable the debug output at the end on pages. diff --git a/tests/twisted/network/test_download.py b/tests/twisted/network/test_download.py index ed55296..4280f5f 100644 --- a/tests/twisted/network/test_download.py +++ b/tests/twisted/network/test_download.py @@ -2,7 +2,7 @@ from twisted.trial import unittest from twisted.internet import defer from twisted.internet import error -from services.command.exceptions import NotModifiedError +from services.command.exceptions import NotModifiedError, TooManyRedirectsError import formencode import os @@ -113,7 +113,8 @@ class TestDownload(unittest.TestCase): state = dict() state["url"] = "http://localhost:9090/test/modified" - d = c.doCommand(state, etag="abc123") + d = c.doCommand(state, etag="abc123", + entity_url="http://localhost:9090/test/modified") d.addCallback(lambda x: unittest.fail("should give a 304")) d.addErrback(lambda f: f.trap(NotModifiedError)) @@ -129,7 +130,8 @@ class TestDownload(unittest.TestCase): self.state = dict() self.state["url"] = "http://localhost:9090/test/modified" - d = c.doCommand(self.state, etag="abc123x") + d = c.doCommand(self.state, etag="abc123x", + entity_url="http://localhost:9090/test/modified") d.addCallback(self._downloadDoneCheckETag) @@ -151,7 +153,9 @@ class TestDownload(unittest.TestCase): state = dict() state["url"] = "http://localhost:9090/test/modified" - d = c.doCommand(state, last_modified="Mon, 03 Nov 2008 01:27:18 GMT") + d = c.doCommand(state, + last_modified="Mon, 03 Nov 2008 01:27:18 GMT", + entity_url="http://localhost:9090/test/modified") d.addCallback(lambda x: unittest.fail("should give a 304")) d.addErrback(lambda f: f.trap(NotModifiedError)) @@ -167,7 +171,9 @@ class TestDownload(unittest.TestCase): self.state = dict() self.state["url"] = "http://localhost:9090/test/modified" - d = c.doCommand(self.state, last_modified="Mon, 03 Nov 2008 01:27:19 GMT") + d = c.doCommand(self.state, + last_modified="Mon, 03 Nov 2008 01:27:19 GMT", + entity_url="http://localhost:9090/test/modified") d.addCallback(self._downloadDoneCheckLastModified) @@ -180,3 +186,33 @@ class TestDownload(unittest.TestCase): print("last_modified %s" % last_modified) assert(last_modified == "Mon, 03 Nov 2008 01:27:18 GMT") + def test_redirect(self): + """ + Test what happens when we get a redirect. + """ + c = DownloadCommand() + + self.state = dict() + self.state["url"] = "http://localhost:9090/test/redirect1" + + d = c.doCommand(self.state) + + d.addCallback(self._downloadDone) + + return d + + def test_redirect_too_many(self): + """ + Test what happens when we get a redirect. + """ + c = DownloadCommand() + + self.state = dict() + self.state["url"] = "http://localhost:9090/test/redirect0" + + d = c.doCommand(self.state) + + d.addCallback(lambda x: unittest.fail("should get too many redirects")) + d.addErrback(lambda f: f.trap(TooManyRedirectsError)) + + return d diff --git a/whoisi/test_controller.py b/whoisi/test_controller.py index 1c8f8bb..74277b6 100644 --- a/whoisi/test_controller.py +++ b/whoisi/test_controller.py @@ -20,10 +20,30 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from turbogears import controllers, expose +from turbogears import controllers, expose, redirect from cherrypy import request, response class TestController(controllers.Controller): + @expose() + def redirect0(self): + raise redirect("/test/redirect1") + + @expose() + def redirect1(self): + raise redirect("/test/redirect2") + + @expose() + def redirect2(self): + raise redirect("/test/redirect3") + + @expose() + def redirect3(self): + raise redirect("/test/redirect4") + + @expose() + def redirect4(self): + raise redirect("/test/modified") + @expose() def modified(self): if request.headers.get("if-none-match", None) == "abc123": -- 2.30.2