From d457464990e2fc6ecd867f79c919732832038b99 Mon Sep 17 00:00:00 2001 From: blizzard Date: Wed, 12 Nov 2008 23:44:34 +0000 Subject: [PATCH] 2008-11-12 Christopher Blizzard * whoisi/test_controller.py: New test controller that is used by the etag support handling. * whoisi/controllers.py (Root): Add a test/ controller. * tests/twisted/network/test_download.py: New tests for etag and last-modified. * services/command/exceptions.py (NotModifiedError): New NotModifiedError that's thrown when a download gets a 304. * services/command/download.py: Add support for setting and handling etag changes in the http support code. Not quite right yet, especially with handling redirects and etag/last-modified, but getting close. You can set etag and last-modified via the DownloadCommand now. If it gets a 304, it will throw a NotModifedError exception you can handle in your command error handler. Is a lot cleaner approach than the old monkeypatching method. git-svn-id: svn://trac.whoisi.com/whoisi/trunk@11 ae879524-a8bd-4c4c-a5ea-74d2e5fc5a2c --- ChangeLog | 22 ++++++ services/command/download.py | 105 ++++++++++++++++++++++--- services/command/exceptions.py | 6 ++ tests/twisted/network/test_download.py | 78 ++++++++++++++++++ whoisi/controllers.py | 2 + whoisi/test_controller.py | 39 +++++++++ 6 files changed, 242 insertions(+), 10 deletions(-) create mode 100644 whoisi/test_controller.py diff --git a/ChangeLog b/ChangeLog index 488d244..a372a1b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,25 @@ +2008-11-12 Christopher Blizzard + + * whoisi/test_controller.py: New test controller that is used by + the etag support handling. + + * whoisi/controllers.py (Root): Add a test/ controller. + + * tests/twisted/network/test_download.py: New tests for etag and + last-modified. + + * services/command/exceptions.py (NotModifiedError): New + NotModifiedError that's thrown when a download gets a 304. + + * services/command/download.py: Add support for setting and + handling etag changes in the http support code. Not quite right + yet, especially with handling redirects and etag/last-modified, + but getting close. You can set etag and last-modified via the + DownloadCommand now. If it gets a 304, it will throw a + NotModifedError exception you can handle in your command error + handler. Is a lot cleaner approach than the old monkeypatching + method. + 2008-10-18 Christopher Blizzard * services/command/feedparse.py (FeedUpdateDatabaseCommand.stupidEntryAlreadyThere): diff --git a/services/command/download.py b/services/command/download.py index c0b5355..9ff4ae3 100644 --- a/services/command/download.py +++ b/services/command/download.py @@ -21,7 +21,8 @@ # SOFTWARE. from services.command.base import BaseCommand -from twisted.web.client import _parse, HTTPDownloader +from services.command.exceptions import NotModifiedError +from twisted.web.client import _parse, HTTPDownloader, HTTPPageDownloader from twisted.internet import defer, reactor import services.config as config @@ -59,13 +60,14 @@ class DownloadCommand(BaseCommand): self.name = "download" self.d = defer.Deferred() - def doCommand(self, state, url=None, *args, **kw): + def doCommand(self, state, url=None, etag=None, last_modified=None,*args, **kw): """ Pass in a url. This will return a deferred that will eventually call back to you with a (result, filename) pair to where the url has been downloaded. It is your responsibility to delete the file once you have finished with it. """ + self.state = state # for testing if self.getTest(state) == "download_connection_refused": url = "http://localhost:9091/something.html" @@ -81,7 +83,8 @@ class DownloadCommand(BaseCommand): # except formencode.api.Invalid: tmpfd, tempfilename = tempfile.mkstemp() os.close(tmpfd) - d = localDownloadPage(str(url), tempfilename) + d = localDownloadPage(str(url), tempfilename, + etag=etag, last_modified=last_modified) # chain our own callback and error handler to return the # filename and/or cleanup if the download fails d.addCallback(self.downloadDone, tempfilename) @@ -93,7 +96,13 @@ class DownloadCommand(BaseCommand): return self.d def downloadDone(self, result, filename): + etag = result["etag"] + last_modified = result["last_modified"] print(" downloadDone") + print(" etag %s" % str(etag)) + print(" last-modified %s" % str(last_modified)) + self.state["download_etag"] = result["etag"] + self.state["download_last_modified"] = result["last_modified"] self.d.callback(filename) def downloadError(self, failure): @@ -101,9 +110,10 @@ class DownloadCommand(BaseCommand): self.d.errback(failure) -# stolen from twisted.web.client so we can add a 307 handler - stupid -# phik! -def localDownloadPage(url, file, contextFactory=None, *args, **kwargs): +# stolen from twisted.web.client so we can add a 307 handler (stupid +# phik!) and a last-modified/etag handler + +def localDownloadPage(url, file, contextFactory=None, etag=None, last_modified=None, *args, **kwargs): """Download a web page to a file. @param file: path to file on filesystem, or file-like object. @@ -135,11 +145,25 @@ def localDownloadPage(url, file, contextFactory=None, *args, **kwargs): kwargs["headers"]["authorization"] = auth else: print(" not adding auth") - - factory = HTTPDownloader(url, file, *args, **kwargs) - # REACH DOWN THE THROAT OF HTTPPageDownloader and HTTPPageGetter - factory.protocol.handleStatus_307 = lambda self: self.handleStatus_301() + # set up our etag and last-modified headers + if etag: + if not kwargs.has_key("headers"): + kwargs["headers"] = dict() + print(" adding etag %s" % etag) + kwargs["headers"]["if-none-match"] = etag + else: + print(" not adding etag") + + if last_modified: + if not kwargs.has_key("headers"): + kwargs["headers"] = dict() + print(" setting last modified") + kwargs["headers"]["if-modified-since"] = last_modified + else: + print(" not setting last modified") + + factory = LocalDownloader(url, file, *args, **kwargs) if scheme == 'https': from twisted.internet import ssl @@ -149,3 +173,64 @@ def localDownloadPage(url, file, contextFactory=None, *args, **kwargs): else: reactor.connectTCP(host, port, factory) return factory.deferred + +# Need to overload some methods in the HTTPPageGetter class to support +# etag, 307s and last-modified +class LocalPageGetter(HTTPPageDownloader): + def handleStatus_307(self): + print(" got 307") + return self.handleStatus_301() + + def handleStatus_304(self): + print(" got 304") + self.factory.noPage(twisted.python.failure.Failure(NotModifiedError(None))) + self.quietLoss = 1 + self.transport.loseConnection() + +# Overload some methods on the HTTPDownloader class to catch etag and +# last-modified data +class LocalDownloader(HTTPDownloader): + + protocol = LocalPageGetter + + def __init__(self, url, fileOrName, + method='GET', postdata=None, headers=None, + agent="Twisted client", supportPartial=0): + self.saved_etag = None + self.saved_last_modified = None + return HTTPDownloader.__init__(self, url, fileOrName, + method, postdata, headers, + agent, supportPartial) + + def gotHeaders(self, headers): + if headers.has_key("etag"): + self.saved_etag = headers["etag"][0] + print(" etag: %s" % self.saved_etag) + else: + print(" no etag") + + if headers.has_key("last-modified"): + self.saved_last_modified = headers["last-modified"][0] + print(" last-modified: %s" % self.saved_last_modified) + else: + print(" no last-modified") + + return HTTPDownloader.gotHeaders(self, headers) + + def pageEnd(self): + if not self.file: + return + try: + self.file.close() + except IOError: + self.deferred.errback(failure.Failure()) + return + self.deferred.callback({"val": self.value, + "etag": self.saved_etag, + "last_modified": self.saved_last_modified}) + + + + + + diff --git a/services/command/exceptions.py b/services/command/exceptions.py index 2d8d747..ee2f3d1 100644 --- a/services/command/exceptions.py +++ b/services/command/exceptions.py @@ -30,6 +30,12 @@ class PageNotFoundError(Exception): def __str__(self): return repr(self.value) +class NotModifiedError(Exception): + def __init__(self, value): + self.value = value + def __str__(self): + return repr(self.value) + class FeedNotFoundError(Exception): def __init__(self, value): self.value = value diff --git a/tests/twisted/network/test_download.py b/tests/twisted/network/test_download.py index 5b11b5a..ed55296 100644 --- a/tests/twisted/network/test_download.py +++ b/tests/twisted/network/test_download.py @@ -2,6 +2,7 @@ from twisted.trial import unittest from twisted.internet import defer from twisted.internet import error +from services.command.exceptions import NotModifiedError import formencode import os @@ -102,3 +103,80 @@ class TestDownload(unittest.TestCase): d.addErrback(lambda f: f.trap(formencode.api.Invalid)) return d + + def test_etag_hit(self): + """ + Test what happens when we pass in a etag that matches. + """ + c = DownloadCommand() + + state = dict() + state["url"] = "http://localhost:9090/test/modified" + + d = c.doCommand(state, etag="abc123") + + d.addCallback(lambda x: unittest.fail("should give a 304")) + d.addErrback(lambda f: f.trap(NotModifiedError)) + + return d + + def test_etag_miss(self): + """ + Test what happens when we pass in a etag that misses + """ + c = DownloadCommand() + + self.state = dict() + self.state["url"] = "http://localhost:9090/test/modified" + + d = c.doCommand(self.state, etag="abc123x") + + d.addCallback(self._downloadDoneCheckETag) + + return d + + def _downloadDoneCheckETag(self, filename): + etag = self.state["download_etag"] + assert(filename) + print("filename %s" % filename) + print("etag %s" % etag) + assert(etag == "abc123") + + def test_last_modified_hit(self): + """ + Test what happens when we pass in a etag that matches. + """ + c = DownloadCommand() + + state = dict() + state["url"] = "http://localhost:9090/test/modified" + + d = c.doCommand(state, last_modified="Mon, 03 Nov 2008 01:27:18 GMT") + + d.addCallback(lambda x: unittest.fail("should give a 304")) + d.addErrback(lambda f: f.trap(NotModifiedError)) + + return d + + def test_last_modified_miss(self): + """ + Test what happens when we pass in a etag that misses + """ + c = DownloadCommand() + + self.state = dict() + self.state["url"] = "http://localhost:9090/test/modified" + + d = c.doCommand(self.state, last_modified="Mon, 03 Nov 2008 01:27:19 GMT") + + d.addCallback(self._downloadDoneCheckLastModified) + + return d + + def _downloadDoneCheckLastModified(self, filename): + last_modified = self.state["download_last_modified"] + assert(filename) + print("filename %s" % filename) + print("last_modified %s" % last_modified) + assert(last_modified == "Mon, 03 Nov 2008 01:27:18 GMT") + diff --git a/whoisi/controllers.py b/whoisi/controllers.py index 21fba9b..32c5da6 100644 --- a/whoisi/controllers.py +++ b/whoisi/controllers.py @@ -47,6 +47,7 @@ from whoisi.utils.names import fast_names_for_person from whoisi.utils.track import get_request_tracking from whoisi.utils.recommendations import get_recommendations from whoisi.api import ApiController +from whoisi.test_controller import TestController import whoisi.utils.follow as follow # from whoisi import json @@ -59,6 +60,7 @@ import re class Root(controllers.RootController): api = ApiController() + test = TestController() @expose(template="whoisi.templates.index") def index(self): diff --git a/whoisi/test_controller.py b/whoisi/test_controller.py new file mode 100644 index 0000000..1c8f8bb --- /dev/null +++ b/whoisi/test_controller.py @@ -0,0 +1,39 @@ +# Copyright (c) 2007-2008 Christopher Blizzard +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation files +# (the "Software"), to deal in the Software without restriction, +# including without limitation the rights to use, copy, modify, merge, +# publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from turbogears import controllers, expose +from cherrypy import request, response + +class TestController(controllers.Controller): + @expose() + def modified(self): + if request.headers.get("if-none-match", None) == "abc123": + response.status = 304 + + response.headers["ETag"] = "abc123" + + if request.headers.get("if-modified-since", None) == "Mon, 03 Nov 2008 01:27:18 GMT": + response.status = 304 + + response.headers["Last-Modified"] = "Mon, 03 Nov 2008 01:27:18 GMT" + + return dict() -- 2.30.2