2008-11-18 Christopher Blizzard <blizzard@0xdeadbeef.com>
authorblizzard <blizzard@ae879524-a8bd-4c4c-a5ea-74d2e5fc5a2c>
Wed, 19 Nov 2008 01:42:09 +0000 (01:42 +0000)
committerblizzard <blizzard@ae879524-a8bd-4c4c-a5ea-74d2e5fc5a2c>
Wed, 19 Nov 2008 01:42:09 +0000 (01:42 +0000)
        * test-ws.cfg: Enable base_url_filter.base_url to localhost:9090
        for testing to generate proper redirects for tests.

        * whoisi/test_controller.py (TestController): Lots of new methods
        that generate redirects to test url handling.

        * tests/twisted/network/test_download.py: Lots of changes to
        support the new entity_url argument for the download command.
        (TestDownload.test_redirect): Test that tests working redirects.
        (TestDownload.test_redirect_too_many): Test that will make sure we
        throw an exception if there are too many redirects for a resource.

        * services/command/exceptions.py (TooManyRedirectsError): New
        exception when a resource has too many redirects.

        * services/command/download.py: More ongoing work to handle etag
        and last-modified headers.  Also first steps to improve support
        for redirects. doCommand has been modified to require etag,
        last_modified and the entry_url to which the etag and
        last_modified apply.  (The original url which might generate
        redirects might also not have the tags applied to it.)  There is
        also a url stack that is saved as urls are followed.  This can be
        used to match many possible urls to a single resource.  (i.e. what
        feedburner does.)  This will also top out at 5 redirects for a
        single resource and then throw a TooManyRedirectsError.

git-svn-id: svn://trac.whoisi.com/whoisi/trunk@12 ae879524-a8bd-4c4c-a5ea-74d2e5fc5a2c

ChangeLog
services/command/download.py
services/command/exceptions.py
test-ws.cfg
tests/twisted/network/test_download.py
whoisi/test_controller.py

index a372a1b..a37117e 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,31 @@
+2008-11-18  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * test-ws.cfg: Enable base_url_filter.base_url to localhost:9090
+       for testing to generate proper redirects for tests.
+
+       * whoisi/test_controller.py (TestController): Lots of new methods
+       that generate redirects to test url handling.
+
+       * tests/twisted/network/test_download.py: Lots of changes to
+       support the new entity_url argument for the download command.
+       (TestDownload.test_redirect): Test that tests working redirects.
+       (TestDownload.test_redirect_too_many): Test that will make sure we
+       throw an exception if there are too many redirects for a resource.
+
+       * services/command/exceptions.py (TooManyRedirectsError): New
+       exception when a resource has too many redirects.
+
+       * services/command/download.py: More ongoing work to handle etag
+       and last-modified headers.  Also first steps to improve support
+       for redirects. doCommand has been modified to require etag,
+       last_modified and the entry_url to which the etag and
+       last_modified apply.  (The original url which might generate
+       redirects might also not have the tags applied to it.)  There is
+       also a url stack that is saved as urls are followed.  This can be
+       used to match many possible urls to a single resource.  (i.e. what
+       feedburner does.)  This will also top out at 5 redirects for a
+       single resource and then throw a TooManyRedirectsError.
+
 2008-11-12  Christopher Blizzard  <blizzard@0xdeadbeef.com>
 
        * whoisi/test_controller.py: New test controller that is used by
index 9ff4ae3..15f44e0 100644 (file)
 # SOFTWARE.
 
 from services.command.base import BaseCommand
-from services.command.exceptions import NotModifiedError
+from services.command.exceptions import NotModifiedError, TooManyRedirectsError
+from services.command.utils import resolve_relative_url
 from twisted.web.client import _parse, HTTPDownloader, HTTPPageDownloader
+from twisted.web import error
 from twisted.internet import defer, reactor
 
 import services.config as config
@@ -59,8 +61,11 @@ class DownloadCommand(BaseCommand):
         BaseCommand.__init__(self)
         self.name = "download"
         self.d = defer.Deferred()
+        self.url_stack = []
 
-    def doCommand(self, state, url=None, etag=None, last_modified=None,*args, **kw):
+    def doCommand(self, state, url=None,
+                  etag=None, last_modified=None, entity_url=None,
+                  *args, **kw):
         """
         Pass in a url.  This will return a deferred that will
         eventually call back to you with a (result, filename) pair to
@@ -68,16 +73,45 @@ class DownloadCommand(BaseCommand):
         to delete the file once you have finished with it.
         """
         self.state = state
-        # for testing
-        if self.getTest(state) == "download_connection_refused":
-            url = "http://localhost:9091/something.html"
-        if self.getTest(state) == "download_404":
-            url = "http://localhost:9090/something.html"
+        self.etag = etag
+        self.last_modified = last_modified
+        self.entity_url = entity_url
+
         try:
             # ugh, this should probably not be here like this
             if not url:
                 url = str(state["url"])
             print("  downloading %s" % url)
+        except Exception, e:
+            print("  exception during download start")
+            self.d.errback(twisted.python.failure.Failure(e))
+
+        self.startDownload(url)
+
+        return self.d
+
+    def startDownload(self, url):
+        # for testing
+        if self.getTest(self.state) == "download_connection_refused":
+            url = "http://localhost:9091/something.html"
+        if self.getTest(self.state) == "download_404":
+            url = "http://localhost:9090/something.html"
+
+        # save this url on the url stack
+        self.url_stack.append(url)
+
+        etag = None
+        last_modified = None
+
+        # we only send along the etag and last_modified headers on the
+        # url if it matches the last body that we downloaded
+        if url == self.entity_url:
+            etag = self.etag
+            last_modified = self.last_modified
+        else:
+            print("  url doesn't match entity url")
+
+        try:
             # check the URL to make sure it's valid
             formencode.validators.URL(add_http=False).to_python(url)        
             # except formencode.api.Invalid:
@@ -101,12 +135,34 @@ class DownloadCommand(BaseCommand):
         print("  downloadDone")
         print("  etag %s" % str(etag))
         print("  last-modified %s" % str(last_modified))
+        print("  url stack %s" % str(self.url_stack))
         self.state["download_etag"] = result["etag"]
         self.state["download_last_modified"] = result["last_modified"]
+        self.state["download_url_stack"] = self.url_stack
         self.d.callback(filename)
 
     def downloadError(self, failure):
-        print("  downloadError")
+        if failure.check(error.PageRedirect):
+            print("  page redirect")
+            print("  location: %s" % failure.value.location)
+            print("  status: %s" % failure.value.status)
+
+            # prevent us from following too many redirects
+            if len(self.url_stack) == 5:
+                self.d.errback(twisted.python.failure.Failure(TooManyRedirectsError(None)))
+                return
+
+            # resolve a relative redirect
+            location = resolve_relative_url(failure.value.location,
+                                            self.url_stack[-1])
+
+            print("  final location %s" % location)
+
+            # ...and restart our download
+            self.startDownload(location)
+            return
+
+        print("  downloadError %s" % str(failure))
         self.d.errback(failure)
         
 
@@ -198,9 +254,14 @@ class LocalDownloader(HTTPDownloader):
                  agent="Twisted client", supportPartial=0):
         self.saved_etag = None
         self.saved_last_modified = None
-        return HTTPDownloader.__init__(self, url, fileOrName,
-                                       method, postdata, headers,
-                                       agent, supportPartial)
+        retval = HTTPDownloader.__init__(self, url, fileOrName,
+                                         method, postdata, headers,
+                                         agent, supportPartial)
+
+        # We override this because HTTPDownloader doesn't pass along
+        # followRedirect to the constructor for HTTPClientFactory
+        self.protocol.followRedirect = 0
+        return retval
 
     def gotHeaders(self, headers):
         if headers.has_key("etag"):
index ee2f3d1..3e96f8a 100644 (file)
@@ -36,6 +36,12 @@ class NotModifiedError(Exception):
     def __str__(self):
         return repr(self.value)
 
+class TooManyRedirectsError(Exception):
+    def __init__(self, value):
+        self.value = value
+    def __str__(self):
+        return repr(self.value)
+
 class FeedNotFoundError(Exception):
     def __init__(self, value):
         self.value = value
index f2d1cd3..c34c302 100644 (file)
@@ -32,6 +32,8 @@ sqlobject.dburi="mysql://root@127.0.0.1:9999/whoisi?charset=utf8"
 # SERVER
 
 # Some server parameters that you may want to tweak
+base_url_filter.on = True
+base_url_filter.base_url = "http://localhost:9090/"
 server.socket_port=9090
 
 # Enable the debug output at the end on pages.
index ed55296..4280f5f 100644 (file)
@@ -2,7 +2,7 @@
 from twisted.trial import unittest
 from twisted.internet import defer
 from twisted.internet import error
-from services.command.exceptions import NotModifiedError
+from services.command.exceptions import NotModifiedError, TooManyRedirectsError
 
 import formencode
 import os
@@ -113,7 +113,8 @@ class TestDownload(unittest.TestCase):
         state = dict()
         state["url"] = "http://localhost:9090/test/modified"
 
-        d = c.doCommand(state, etag="abc123")
+        d = c.doCommand(state, etag="abc123",
+                        entity_url="http://localhost:9090/test/modified")
 
         d.addCallback(lambda x: unittest.fail("should give a 304"))
         d.addErrback(lambda f: f.trap(NotModifiedError))
@@ -129,7 +130,8 @@ class TestDownload(unittest.TestCase):
         self.state = dict()
         self.state["url"] = "http://localhost:9090/test/modified"
 
-        d = c.doCommand(self.state, etag="abc123x")
+        d = c.doCommand(self.state, etag="abc123x",
+                        entity_url="http://localhost:9090/test/modified")
 
         d.addCallback(self._downloadDoneCheckETag)
 
@@ -151,7 +153,9 @@ class TestDownload(unittest.TestCase):
         state = dict()
         state["url"] = "http://localhost:9090/test/modified"
 
-        d = c.doCommand(state, last_modified="Mon, 03 Nov 2008 01:27:18 GMT")
+        d = c.doCommand(state,
+                        last_modified="Mon, 03 Nov 2008 01:27:18 GMT",
+                        entity_url="http://localhost:9090/test/modified")
 
         d.addCallback(lambda x: unittest.fail("should give a 304"))
         d.addErrback(lambda f: f.trap(NotModifiedError))
@@ -167,7 +171,9 @@ class TestDownload(unittest.TestCase):
         self.state = dict()
         self.state["url"] = "http://localhost:9090/test/modified"
 
-        d = c.doCommand(self.state, last_modified="Mon, 03 Nov 2008 01:27:19 GMT")
+        d = c.doCommand(self.state,
+                        last_modified="Mon, 03 Nov 2008 01:27:19 GMT",
+                        entity_url="http://localhost:9090/test/modified")
 
         d.addCallback(self._downloadDoneCheckLastModified)
 
@@ -180,3 +186,33 @@ class TestDownload(unittest.TestCase):
         print("last_modified %s" % last_modified)
         assert(last_modified == "Mon, 03 Nov 2008 01:27:18 GMT")
         
+    def test_redirect(self):
+        """
+        Test what happens when we get a redirect.
+        """
+        c = DownloadCommand()
+
+        self.state = dict()
+        self.state["url"] = "http://localhost:9090/test/redirect1"
+
+        d = c.doCommand(self.state)
+
+        d.addCallback(self._downloadDone)
+
+        return d
+
+    def test_redirect_too_many(self):
+        """
+        Test what happens when we get a redirect.
+        """
+        c = DownloadCommand()
+
+        self.state = dict()
+        self.state["url"] = "http://localhost:9090/test/redirect0"
+
+        d = c.doCommand(self.state)
+
+        d.addCallback(lambda x: unittest.fail("should get too many redirects"))
+        d.addErrback(lambda f: f.trap(TooManyRedirectsError))
+
+        return d
index 1c8f8bb..74277b6 100644 (file)
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-from turbogears import controllers, expose
+from turbogears import controllers, expose, redirect
 from cherrypy import request, response
 
 class TestController(controllers.Controller):
     @expose()
+    def redirect0(self):
+        raise redirect("/test/redirect1")
+
+    @expose()
+    def redirect1(self):
+        raise redirect("/test/redirect2")
+
+    @expose()
+    def redirect2(self):
+        raise redirect("/test/redirect3")
+
+    @expose()
+    def redirect3(self):
+        raise redirect("/test/redirect4")
+
+    @expose()
+    def redirect4(self):
+        raise redirect("/test/modified")
+
+    @expose()
     def modified(self):
         if request.headers.get("if-none-match", None) == "abc123":
             response.status = 304