2008-11-12 Christopher Blizzard <blizzard@0xdeadbeef.com>
authorblizzard <blizzard@ae879524-a8bd-4c4c-a5ea-74d2e5fc5a2c>
Wed, 12 Nov 2008 23:44:34 +0000 (23:44 +0000)
committerblizzard <blizzard@ae879524-a8bd-4c4c-a5ea-74d2e5fc5a2c>
Wed, 12 Nov 2008 23:44:34 +0000 (23:44 +0000)
        * whoisi/test_controller.py: New test controller that is used by
        the etag support handling.

        * whoisi/controllers.py (Root): Add a test/ controller.

        * tests/twisted/network/test_download.py: New tests for etag and
        last-modified.

        * services/command/exceptions.py (NotModifiedError): New
        NotModifiedError that's thrown when a download gets a 304.

        * services/command/download.py: Add support for setting and
        handling etag changes in the http support code.  Not quite right
        yet, especially with handling redirects and etag/last-modified,
        but getting close.  You can set etag and last-modified via the
        DownloadCommand now.  If it gets a 304, it will throw a
        NotModifedError exception you can handle in your command error
        handler.  Is a lot cleaner approach than the old monkeypatching
        method.

git-svn-id: svn://trac.whoisi.com/whoisi/trunk@11 ae879524-a8bd-4c4c-a5ea-74d2e5fc5a2c

ChangeLog
services/command/download.py
services/command/exceptions.py
tests/twisted/network/test_download.py
whoisi/controllers.py
whoisi/test_controller.py [new file with mode: 0644]

index 488d244..a372a1b 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,25 @@
+2008-11-12  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/test_controller.py: New test controller that is used by
+       the etag support handling.
+
+       * whoisi/controllers.py (Root): Add a test/ controller.
+
+       * tests/twisted/network/test_download.py: New tests for etag and
+       last-modified.
+
+       * services/command/exceptions.py (NotModifiedError): New
+       NotModifiedError that's thrown when a download gets a 304.
+
+       * services/command/download.py: Add support for setting and
+       handling etag changes in the http support code.  Not quite right
+       yet, especially with handling redirects and etag/last-modified,
+       but getting close.  You can set etag and last-modified via the
+       DownloadCommand now.  If it gets a 304, it will throw a
+       NotModifedError exception you can handle in your command error
+       handler.  Is a lot cleaner approach than the old monkeypatching
+       method.
+
 2008-10-18  Christopher Blizzard  <blizzard@0xdeadbeef.com>
 
        * services/command/feedparse.py (FeedUpdateDatabaseCommand.stupidEntryAlreadyThere):
index c0b5355..9ff4ae3 100644 (file)
@@ -21,7 +21,8 @@
 # SOFTWARE.
 
 from services.command.base import BaseCommand
-from twisted.web.client import _parse, HTTPDownloader
+from services.command.exceptions import NotModifiedError
+from twisted.web.client import _parse, HTTPDownloader, HTTPPageDownloader
 from twisted.internet import defer, reactor
 
 import services.config as config
@@ -59,13 +60,14 @@ class DownloadCommand(BaseCommand):
         self.name = "download"
         self.d = defer.Deferred()
 
-    def doCommand(self, state, url=None, *args, **kw):
+    def doCommand(self, state, url=None, etag=None, last_modified=None,*args, **kw):
         """
         Pass in a url.  This will return a deferred that will
         eventually call back to you with a (result, filename) pair to
         where the url has been downloaded.  It is your responsibility
         to delete the file once you have finished with it.
         """
+        self.state = state
         # for testing
         if self.getTest(state) == "download_connection_refused":
             url = "http://localhost:9091/something.html"
@@ -81,7 +83,8 @@ class DownloadCommand(BaseCommand):
             # except formencode.api.Invalid:
             tmpfd, tempfilename = tempfile.mkstemp()
             os.close(tmpfd)
-            d = localDownloadPage(str(url), tempfilename)
+            d = localDownloadPage(str(url), tempfilename,
+                                  etag=etag, last_modified=last_modified)
             # chain our own callback and error handler to return the
             # filename and/or cleanup if the download fails
             d.addCallback(self.downloadDone, tempfilename)
@@ -93,7 +96,13 @@ class DownloadCommand(BaseCommand):
         return self.d
 
     def downloadDone(self, result, filename):
+        etag = result["etag"]
+        last_modified = result["last_modified"]
         print("  downloadDone")
+        print("  etag %s" % str(etag))
+        print("  last-modified %s" % str(last_modified))
+        self.state["download_etag"] = result["etag"]
+        self.state["download_last_modified"] = result["last_modified"]
         self.d.callback(filename)
 
     def downloadError(self, failure):
@@ -101,9 +110,10 @@ class DownloadCommand(BaseCommand):
         self.d.errback(failure)
         
 
-# stolen from twisted.web.client so we can add a 307 handler - stupid
-# phik!
-def localDownloadPage(url, file, contextFactory=None, *args, **kwargs):
+# stolen from twisted.web.client so we can add a 307 handler (stupid
+# phik!) and a last-modified/etag handler
+
+def localDownloadPage(url, file, contextFactory=None, etag=None, last_modified=None, *args, **kwargs):
     """Download a web page to a file.
 
     @param file: path to file on filesystem, or file-like object.
@@ -135,11 +145,25 @@ def localDownloadPage(url, file, contextFactory=None, *args, **kwargs):
         kwargs["headers"]["authorization"] = auth
     else:
         print("  not adding auth")
-        
 
-    factory = HTTPDownloader(url, file, *args, **kwargs)
-    # REACH DOWN THE THROAT OF HTTPPageDownloader and HTTPPageGetter
-    factory.protocol.handleStatus_307 = lambda self: self.handleStatus_301()
+    # set up our etag and last-modified headers
+    if etag:
+        if not kwargs.has_key("headers"):
+            kwargs["headers"] = dict()
+        print("  adding etag %s" % etag)
+        kwargs["headers"]["if-none-match"] = etag
+    else:
+        print("  not adding etag")
+
+    if last_modified:
+        if not kwargs.has_key("headers"):
+            kwargs["headers"] = dict()
+        print("  setting last modified")
+        kwargs["headers"]["if-modified-since"] = last_modified
+    else:
+        print("  not setting last modified")
+
+    factory = LocalDownloader(url, file, *args, **kwargs)
 
     if scheme == 'https':
         from twisted.internet import ssl
@@ -149,3 +173,64 @@ def localDownloadPage(url, file, contextFactory=None, *args, **kwargs):
     else:
         reactor.connectTCP(host, port, factory)
     return factory.deferred
+
+# Need to overload some methods in the HTTPPageGetter class to support
+# etag, 307s and last-modified
+class LocalPageGetter(HTTPPageDownloader):
+    def handleStatus_307(self):
+        print("  got 307")
+        return self.handleStatus_301()
+
+    def handleStatus_304(self):
+        print("  got 304")
+        self.factory.noPage(twisted.python.failure.Failure(NotModifiedError(None)))
+        self.quietLoss = 1
+        self.transport.loseConnection()
+
+# Overload some methods on the HTTPDownloader class to catch etag and
+# last-modified data
+class LocalDownloader(HTTPDownloader):
+
+    protocol = LocalPageGetter
+
+    def __init__(self, url, fileOrName,
+                 method='GET', postdata=None, headers=None,
+                 agent="Twisted client", supportPartial=0):
+        self.saved_etag = None
+        self.saved_last_modified = None
+        return HTTPDownloader.__init__(self, url, fileOrName,
+                                       method, postdata, headers,
+                                       agent, supportPartial)
+
+    def gotHeaders(self, headers):
+        if headers.has_key("etag"):
+            self.saved_etag = headers["etag"][0]
+            print("  etag: %s" % self.saved_etag)
+        else:
+            print("  no etag")
+
+        if headers.has_key("last-modified"):
+            self.saved_last_modified = headers["last-modified"][0]
+            print("  last-modified: %s" % self.saved_last_modified)
+        else:
+            print("  no last-modified")
+
+        return HTTPDownloader.gotHeaders(self, headers)
+
+    def pageEnd(self):
+        if not self.file:
+            return
+        try:
+            self.file.close()
+        except IOError:
+            self.deferred.errback(failure.Failure())
+            return
+        self.deferred.callback({"val": self.value,
+                                "etag": self.saved_etag,
+                                "last_modified": self.saved_last_modified})
+
+
+
+
+
+
index 2d8d747..ee2f3d1 100644 (file)
@@ -30,6 +30,12 @@ class PageNotFoundError(Exception):
     def __str__(self):
         return repr(self.value)
 
+class NotModifiedError(Exception):
+    def __init__(self, value):
+        self.value = value
+    def __str__(self):
+        return repr(self.value)
+
 class FeedNotFoundError(Exception):
     def __init__(self, value):
         self.value = value
index 5b11b5a..ed55296 100644 (file)
@@ -2,6 +2,7 @@
 from twisted.trial import unittest
 from twisted.internet import defer
 from twisted.internet import error
+from services.command.exceptions import NotModifiedError
 
 import formencode
 import os
@@ -102,3 +103,80 @@ class TestDownload(unittest.TestCase):
         d.addErrback(lambda f: f.trap(formencode.api.Invalid))
 
         return d
+
+    def test_etag_hit(self):
+        """
+        Test what happens when we pass in a etag that matches.
+        """
+        c = DownloadCommand()
+
+        state = dict()
+        state["url"] = "http://localhost:9090/test/modified"
+
+        d = c.doCommand(state, etag="abc123")
+
+        d.addCallback(lambda x: unittest.fail("should give a 304"))
+        d.addErrback(lambda f: f.trap(NotModifiedError))
+
+        return d
+
+    def test_etag_miss(self):
+        """
+        Test what happens when we pass in a etag that misses
+        """
+        c = DownloadCommand()
+
+        self.state = dict()
+        self.state["url"] = "http://localhost:9090/test/modified"
+
+        d = c.doCommand(self.state, etag="abc123x")
+
+        d.addCallback(self._downloadDoneCheckETag)
+
+        return d
+
+    def _downloadDoneCheckETag(self, filename):
+        etag = self.state["download_etag"]
+        assert(filename)
+        print("filename %s" % filename)
+        print("etag %s" % etag)
+        assert(etag == "abc123")
+
+    def test_last_modified_hit(self):
+        """
+        Test what happens when we pass in a etag that matches.
+        """
+        c = DownloadCommand()
+
+        state = dict()
+        state["url"] = "http://localhost:9090/test/modified"
+
+        d = c.doCommand(state, last_modified="Mon, 03 Nov 2008 01:27:18 GMT")
+
+        d.addCallback(lambda x: unittest.fail("should give a 304"))
+        d.addErrback(lambda f: f.trap(NotModifiedError))
+
+        return d
+
+    def test_last_modified_miss(self):
+        """
+        Test what happens when we pass in a etag that misses
+        """
+        c = DownloadCommand()
+
+        self.state = dict()
+        self.state["url"] = "http://localhost:9090/test/modified"
+
+        d = c.doCommand(self.state, last_modified="Mon, 03 Nov 2008 01:27:19 GMT")
+
+        d.addCallback(self._downloadDoneCheckLastModified)
+
+        return d
+
+    def _downloadDoneCheckLastModified(self, filename):
+        last_modified = self.state["download_last_modified"]
+        assert(filename)
+        print("filename %s" % filename)
+        print("last_modified %s" % last_modified)
+        assert(last_modified == "Mon, 03 Nov 2008 01:27:18 GMT")
+        
index 21fba9b..32c5da6 100644 (file)
@@ -47,6 +47,7 @@ from whoisi.utils.names import fast_names_for_person
 from whoisi.utils.track import get_request_tracking
 from whoisi.utils.recommendations import get_recommendations
 from whoisi.api import ApiController
+from whoisi.test_controller import TestController
 import whoisi.utils.follow as follow
 # from whoisi import json
 
@@ -59,6 +60,7 @@ import re
 class Root(controllers.RootController):
 
     api = ApiController()
+    test = TestController()
 
     @expose(template="whoisi.templates.index")
     def index(self):
diff --git a/whoisi/test_controller.py b/whoisi/test_controller.py
new file mode 100644 (file)
index 0000000..1c8f8bb
--- /dev/null
@@ -0,0 +1,39 @@
+# Copyright (c) 2007-2008 Christopher Blizzard <blizzard@0xdeadbeef.com>
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation files
+# (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge,
+# publish, distribute, sublicense, and/or sell copies of the Software,
+# and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from turbogears import controllers, expose
+from cherrypy import request, response
+
+class TestController(controllers.Controller):
+    @expose()
+    def modified(self):
+        if request.headers.get("if-none-match", None) == "abc123":
+            response.status = 304
+
+        response.headers["ETag"] = "abc123"
+
+        if request.headers.get("if-modified-since", None) == "Mon, 03 Nov 2008 01:27:18 GMT":
+            response.status = 304
+
+        response.headers["Last-Modified"] = "Mon, 03 Nov 2008 01:27:18 GMT"
+        
+        return dict()