# SOFTWARE.
from services.command.base import BaseCommand
-from twisted.web.client import _parse, HTTPDownloader
+from services.command.exceptions import NotModifiedError
+from twisted.web.client import _parse, HTTPDownloader, HTTPPageDownloader
from twisted.internet import defer, reactor
import services.config as config
self.name = "download"
self.d = defer.Deferred()
- def doCommand(self, state, url=None, *args, **kw):
+ def doCommand(self, state, url=None, etag=None, last_modified=None,*args, **kw):
"""
Pass in a url. This will return a deferred that will
eventually call back to you with a (result, filename) pair to
where the url has been downloaded. It is your responsibility
to delete the file once you have finished with it.
"""
+ self.state = state
# for testing
if self.getTest(state) == "download_connection_refused":
url = "http://localhost:9091/something.html"
# except formencode.api.Invalid:
tmpfd, tempfilename = tempfile.mkstemp()
os.close(tmpfd)
- d = localDownloadPage(str(url), tempfilename)
+ d = localDownloadPage(str(url), tempfilename,
+ etag=etag, last_modified=last_modified)
# chain our own callback and error handler to return the
# filename and/or cleanup if the download fails
d.addCallback(self.downloadDone, tempfilename)
return self.d
def downloadDone(self, result, filename):
+ etag = result["etag"]
+ last_modified = result["last_modified"]
print(" downloadDone")
+ print(" etag %s" % str(etag))
+ print(" last-modified %s" % str(last_modified))
+ self.state["download_etag"] = result["etag"]
+ self.state["download_last_modified"] = result["last_modified"]
self.d.callback(filename)
def downloadError(self, failure):
self.d.errback(failure)
-# stolen from twisted.web.client so we can add a 307 handler - stupid
-# phik!
-def localDownloadPage(url, file, contextFactory=None, *args, **kwargs):
+# stolen from twisted.web.client so we can add a 307 handler (stupid
+# phik!) and a last-modified/etag handler
+
+def localDownloadPage(url, file, contextFactory=None, etag=None, last_modified=None, *args, **kwargs):
"""Download a web page to a file.
@param file: path to file on filesystem, or file-like object.
kwargs["headers"]["authorization"] = auth
else:
print(" not adding auth")
-
- factory = HTTPDownloader(url, file, *args, **kwargs)
- # REACH DOWN THE THROAT OF HTTPPageDownloader and HTTPPageGetter
- factory.protocol.handleStatus_307 = lambda self: self.handleStatus_301()
+ # set up our etag and last-modified headers
+ if etag:
+ if not kwargs.has_key("headers"):
+ kwargs["headers"] = dict()
+ print(" adding etag %s" % etag)
+ kwargs["headers"]["if-none-match"] = etag
+ else:
+ print(" not adding etag")
+
+ if last_modified:
+ if not kwargs.has_key("headers"):
+ kwargs["headers"] = dict()
+ print(" setting last modified")
+ kwargs["headers"]["if-modified-since"] = last_modified
+ else:
+ print(" not setting last modified")
+
+ factory = LocalDownloader(url, file, *args, **kwargs)
if scheme == 'https':
from twisted.internet import ssl
else:
reactor.connectTCP(host, port, factory)
return factory.deferred
+
+# Need to overload some methods in the HTTPPageGetter class to support
+# etag, 307s and last-modified
+class LocalPageGetter(HTTPPageDownloader):
+ def handleStatus_307(self):
+ print(" got 307")
+ return self.handleStatus_301()
+
+ def handleStatus_304(self):
+ print(" got 304")
+ self.factory.noPage(twisted.python.failure.Failure(NotModifiedError(None)))
+ self.quietLoss = 1
+ self.transport.loseConnection()
+
+# Overload some methods on the HTTPDownloader class to catch etag and
+# last-modified data
+class LocalDownloader(HTTPDownloader):
+
+ protocol = LocalPageGetter
+
+ def __init__(self, url, fileOrName,
+ method='GET', postdata=None, headers=None,
+ agent="Twisted client", supportPartial=0):
+ self.saved_etag = None
+ self.saved_last_modified = None
+ return HTTPDownloader.__init__(self, url, fileOrName,
+ method, postdata, headers,
+ agent, supportPartial)
+
+ def gotHeaders(self, headers):
+ if headers.has_key("etag"):
+ self.saved_etag = headers["etag"][0]
+ print(" etag: %s" % self.saved_etag)
+ else:
+ print(" no etag")
+
+ if headers.has_key("last-modified"):
+ self.saved_last_modified = headers["last-modified"][0]
+ print(" last-modified: %s" % self.saved_last_modified)
+ else:
+ print(" no last-modified")
+
+ return HTTPDownloader.gotHeaders(self, headers)
+
+ def pageEnd(self):
+ if not self.file:
+ return
+ try:
+ self.file.close()
+ except IOError:
+ self.deferred.errback(failure.Failure())
+ return
+ self.deferred.callback({"val": self.value,
+ "etag": self.saved_etag,
+ "last_modified": self.saved_last_modified})
+
+
+
+
+
+
from twisted.trial import unittest
from twisted.internet import defer
from twisted.internet import error
+from services.command.exceptions import NotModifiedError
import formencode
import os
d.addErrback(lambda f: f.trap(formencode.api.Invalid))
return d
+
+ def test_etag_hit(self):
+ """
+ Test what happens when we pass in a etag that matches.
+ """
+ c = DownloadCommand()
+
+ state = dict()
+ state["url"] = "http://localhost:9090/test/modified"
+
+ d = c.doCommand(state, etag="abc123")
+
+ d.addCallback(lambda x: unittest.fail("should give a 304"))
+ d.addErrback(lambda f: f.trap(NotModifiedError))
+
+ return d
+
+ def test_etag_miss(self):
+ """
+ Test what happens when we pass in a etag that misses
+ """
+ c = DownloadCommand()
+
+ self.state = dict()
+ self.state["url"] = "http://localhost:9090/test/modified"
+
+ d = c.doCommand(self.state, etag="abc123x")
+
+ d.addCallback(self._downloadDoneCheckETag)
+
+ return d
+
+ def _downloadDoneCheckETag(self, filename):
+ etag = self.state["download_etag"]
+ assert(filename)
+ print("filename %s" % filename)
+ print("etag %s" % etag)
+ assert(etag == "abc123")
+
+ def test_last_modified_hit(self):
+ """
+ Test what happens when we pass in a etag that matches.
+ """
+ c = DownloadCommand()
+
+ state = dict()
+ state["url"] = "http://localhost:9090/test/modified"
+
+ d = c.doCommand(state, last_modified="Mon, 03 Nov 2008 01:27:18 GMT")
+
+ d.addCallback(lambda x: unittest.fail("should give a 304"))
+ d.addErrback(lambda f: f.trap(NotModifiedError))
+
+ return d
+
+ def test_last_modified_miss(self):
+ """
+ Test what happens when we pass in a etag that misses
+ """
+ c = DownloadCommand()
+
+ self.state = dict()
+ self.state["url"] = "http://localhost:9090/test/modified"
+
+ d = c.doCommand(self.state, last_modified="Mon, 03 Nov 2008 01:27:19 GMT")
+
+ d.addCallback(self._downloadDoneCheckLastModified)
+
+ return d
+
+ def _downloadDoneCheckLastModified(self, filename):
+ last_modified = self.state["download_last_modified"]
+ assert(filename)
+ print("filename %s" % filename)
+ print("last_modified %s" % last_modified)
+ assert(last_modified == "Mon, 03 Nov 2008 01:27:18 GMT")
+
--- /dev/null
+# Copyright (c) 2007-2008 Christopher Blizzard <blizzard@0xdeadbeef.com>
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation files
+# (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge,
+# publish, distribute, sublicense, and/or sell copies of the Software,
+# and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from turbogears import controllers, expose
+from cherrypy import request, response
+
+class TestController(controllers.Controller):
+ @expose()
+ def modified(self):
+ if request.headers.get("if-none-match", None) == "abc123":
+ response.status = 304
+
+ response.headers["ETag"] = "abc123"
+
+ if request.headers.get("if-modified-since", None) == "Mon, 03 Nov 2008 01:27:18 GMT":
+ response.status = 304
+
+ response.headers["Last-Modified"] = "Mon, 03 Nov 2008 01:27:18 GMT"
+
+ return dict()