* test-ws.cfg: Enable base_url_filter.base_url to localhost:9090
for testing to generate proper redirects for tests.
* whoisi/test_controller.py (TestController): Lots of new methods
that generate redirects to test url handling.
* tests/twisted/network/test_download.py: Lots of changes to
support the new entity_url argument for the download command.
(TestDownload.test_redirect): Test that tests working redirects.
(TestDownload.test_redirect_too_many): Test that will make sure we
throw an exception if there are too many redirects for a resource.
* services/command/exceptions.py (TooManyRedirectsError): New
exception when a resource has too many redirects.
* services/command/download.py: More ongoing work to handle etag
and last-modified headers. Also first steps to improve support
for redirects. doCommand has been modified to require etag,
last_modified and the entry_url to which the etag and
last_modified apply. (The original url which might generate
redirects might also not have the tags applied to it.) There is
also a url stack that is saved as urls are followed. This can be
used to match many possible urls to a single resource. (i.e. what
feedburner does.) This will also top out at 5 redirects for a
single resource and then throw a TooManyRedirectsError.
git-svn-id: svn://trac.whoisi.com/whoisi/trunk@12
ae879524-a8bd-4c4c-a5ea-
74d2e5fc5a2c
+2008-11-18 Christopher Blizzard <blizzard@0xdeadbeef.com>
+
+ * test-ws.cfg: Enable base_url_filter.base_url to localhost:9090
+ for testing to generate proper redirects for tests.
+
+ * whoisi/test_controller.py (TestController): Lots of new methods
+ that generate redirects to test url handling.
+
+ * tests/twisted/network/test_download.py: Lots of changes to
+ support the new entity_url argument for the download command.
+ (TestDownload.test_redirect): Test that tests working redirects.
+ (TestDownload.test_redirect_too_many): Test that will make sure we
+ throw an exception if there are too many redirects for a resource.
+
+ * services/command/exceptions.py (TooManyRedirectsError): New
+ exception when a resource has too many redirects.
+
+ * services/command/download.py: More ongoing work to handle etag
+ and last-modified headers. Also first steps to improve support
+ for redirects. doCommand has been modified to require etag,
+ last_modified and the entry_url to which the etag and
+ last_modified apply. (The original url which might generate
+ redirects might also not have the tags applied to it.) There is
+ also a url stack that is saved as urls are followed. This can be
+ used to match many possible urls to a single resource. (i.e. what
+ feedburner does.) This will also top out at 5 redirects for a
+ single resource and then throw a TooManyRedirectsError.
+
2008-11-12 Christopher Blizzard <blizzard@0xdeadbeef.com>
* whoisi/test_controller.py: New test controller that is used by
# SOFTWARE.
from services.command.base import BaseCommand
-from services.command.exceptions import NotModifiedError
+from services.command.exceptions import NotModifiedError, TooManyRedirectsError
+from services.command.utils import resolve_relative_url
from twisted.web.client import _parse, HTTPDownloader, HTTPPageDownloader
+from twisted.web import error
from twisted.internet import defer, reactor
import services.config as config
BaseCommand.__init__(self)
self.name = "download"
self.d = defer.Deferred()
+ self.url_stack = []
- def doCommand(self, state, url=None, etag=None, last_modified=None,*args, **kw):
+ def doCommand(self, state, url=None,
+ etag=None, last_modified=None, entity_url=None,
+ *args, **kw):
"""
Pass in a url. This will return a deferred that will
eventually call back to you with a (result, filename) pair to
to delete the file once you have finished with it.
"""
self.state = state
- # for testing
- if self.getTest(state) == "download_connection_refused":
- url = "http://localhost:9091/something.html"
- if self.getTest(state) == "download_404":
- url = "http://localhost:9090/something.html"
+ self.etag = etag
+ self.last_modified = last_modified
+ self.entity_url = entity_url
+
try:
# ugh, this should probably not be here like this
if not url:
url = str(state["url"])
print(" downloading %s" % url)
+ except Exception, e:
+ print(" exception during download start")
+ self.d.errback(twisted.python.failure.Failure(e))
+
+ self.startDownload(url)
+
+ return self.d
+
+ def startDownload(self, url):
+ # for testing
+ if self.getTest(self.state) == "download_connection_refused":
+ url = "http://localhost:9091/something.html"
+ if self.getTest(self.state) == "download_404":
+ url = "http://localhost:9090/something.html"
+
+ # save this url on the url stack
+ self.url_stack.append(url)
+
+ etag = None
+ last_modified = None
+
+ # we only send along the etag and last_modified headers on the
+ # url if it matches the last body that we downloaded
+ if url == self.entity_url:
+ etag = self.etag
+ last_modified = self.last_modified
+ else:
+ print(" url doesn't match entity url")
+
+ try:
# check the URL to make sure it's valid
formencode.validators.URL(add_http=False).to_python(url)
# except formencode.api.Invalid:
print(" downloadDone")
print(" etag %s" % str(etag))
print(" last-modified %s" % str(last_modified))
+ print(" url stack %s" % str(self.url_stack))
self.state["download_etag"] = result["etag"]
self.state["download_last_modified"] = result["last_modified"]
+ self.state["download_url_stack"] = self.url_stack
self.d.callback(filename)
def downloadError(self, failure):
- print(" downloadError")
+ if failure.check(error.PageRedirect):
+ print(" page redirect")
+ print(" location: %s" % failure.value.location)
+ print(" status: %s" % failure.value.status)
+
+ # prevent us from following too many redirects
+ if len(self.url_stack) == 5:
+ self.d.errback(twisted.python.failure.Failure(TooManyRedirectsError(None)))
+ return
+
+ # resolve a relative redirect
+ location = resolve_relative_url(failure.value.location,
+ self.url_stack[-1])
+
+ print(" final location %s" % location)
+
+ # ...and restart our download
+ self.startDownload(location)
+ return
+
+ print(" downloadError %s" % str(failure))
self.d.errback(failure)
agent="Twisted client", supportPartial=0):
self.saved_etag = None
self.saved_last_modified = None
- return HTTPDownloader.__init__(self, url, fileOrName,
- method, postdata, headers,
- agent, supportPartial)
+ retval = HTTPDownloader.__init__(self, url, fileOrName,
+ method, postdata, headers,
+ agent, supportPartial)
+
+ # We override this because HTTPDownloader doesn't pass along
+ # followRedirect to the constructor for HTTPClientFactory
+ self.protocol.followRedirect = 0
+ return retval
def gotHeaders(self, headers):
if headers.has_key("etag"):
def __str__(self):
return repr(self.value)
+class TooManyRedirectsError(Exception):
+ def __init__(self, value):
+ self.value = value
+ def __str__(self):
+ return repr(self.value)
+
class FeedNotFoundError(Exception):
def __init__(self, value):
self.value = value
# SERVER
# Some server parameters that you may want to tweak
+base_url_filter.on = True
+base_url_filter.base_url = "http://localhost:9090/"
server.socket_port=9090
# Enable the debug output at the end on pages.
from twisted.trial import unittest
from twisted.internet import defer
from twisted.internet import error
-from services.command.exceptions import NotModifiedError
+from services.command.exceptions import NotModifiedError, TooManyRedirectsError
import formencode
import os
state = dict()
state["url"] = "http://localhost:9090/test/modified"
- d = c.doCommand(state, etag="abc123")
+ d = c.doCommand(state, etag="abc123",
+ entity_url="http://localhost:9090/test/modified")
d.addCallback(lambda x: unittest.fail("should give a 304"))
d.addErrback(lambda f: f.trap(NotModifiedError))
self.state = dict()
self.state["url"] = "http://localhost:9090/test/modified"
- d = c.doCommand(self.state, etag="abc123x")
+ d = c.doCommand(self.state, etag="abc123x",
+ entity_url="http://localhost:9090/test/modified")
d.addCallback(self._downloadDoneCheckETag)
state = dict()
state["url"] = "http://localhost:9090/test/modified"
- d = c.doCommand(state, last_modified="Mon, 03 Nov 2008 01:27:18 GMT")
+ d = c.doCommand(state,
+ last_modified="Mon, 03 Nov 2008 01:27:18 GMT",
+ entity_url="http://localhost:9090/test/modified")
d.addCallback(lambda x: unittest.fail("should give a 304"))
d.addErrback(lambda f: f.trap(NotModifiedError))
self.state = dict()
self.state["url"] = "http://localhost:9090/test/modified"
- d = c.doCommand(self.state, last_modified="Mon, 03 Nov 2008 01:27:19 GMT")
+ d = c.doCommand(self.state,
+ last_modified="Mon, 03 Nov 2008 01:27:19 GMT",
+ entity_url="http://localhost:9090/test/modified")
d.addCallback(self._downloadDoneCheckLastModified)
print("last_modified %s" % last_modified)
assert(last_modified == "Mon, 03 Nov 2008 01:27:18 GMT")
+ def test_redirect(self):
+ """
+ Test what happens when we get a redirect.
+ """
+ c = DownloadCommand()
+
+ self.state = dict()
+ self.state["url"] = "http://localhost:9090/test/redirect1"
+
+ d = c.doCommand(self.state)
+
+ d.addCallback(self._downloadDone)
+
+ return d
+
+ def test_redirect_too_many(self):
+ """
+ Test what happens when we get a redirect.
+ """
+ c = DownloadCommand()
+
+ self.state = dict()
+ self.state["url"] = "http://localhost:9090/test/redirect0"
+
+ d = c.doCommand(self.state)
+
+ d.addCallback(lambda x: unittest.fail("should get too many redirects"))
+ d.addErrback(lambda f: f.trap(TooManyRedirectsError))
+
+ return d
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
-from turbogears import controllers, expose
+from turbogears import controllers, expose, redirect
from cherrypy import request, response
class TestController(controllers.Controller):
+ @expose()
+ def redirect0(self):
+ raise redirect("/test/redirect1")
+
+ @expose()
+ def redirect1(self):
+ raise redirect("/test/redirect2")
+
+ @expose()
+ def redirect2(self):
+ raise redirect("/test/redirect3")
+
+ @expose()
+ def redirect3(self):
+ raise redirect("/test/redirect4")
+
+ @expose()
+ def redirect4(self):
+ raise redirect("/test/modified")
+
@expose()
def modified(self):
if request.headers.get("if-none-match", None) == "abc123":